From 17b94a2d30a3384113b35baada69c98e13bb7b93 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Nov 03 2021 19:11:58 +0000 Subject: import glusterfs-6.0-57.4.el9 --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..134666a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/glusterfs-6.0.tar.gz diff --git a/.glusterfs.metadata b/.glusterfs.metadata new file mode 100644 index 0000000..98d5fc3 --- /dev/null +++ b/.glusterfs.metadata @@ -0,0 +1 @@ +c9d75f37e00502a10f64cd4ba9aafb17552e0800 SOURCES/glusterfs-6.0.tar.gz diff --git a/SOURCES/0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch b/SOURCES/0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch new file mode 100644 index 0000000..9ca880d --- /dev/null +++ b/SOURCES/0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch @@ -0,0 +1,1557 @@ +From 78060c16f88594b3424e512a9ef0e4a8f56e88c3 Mon Sep 17 00:00:00 2001 +From: Kaushal M +Date: Thu, 6 Dec 2018 15:04:16 +0530 +Subject: [PATCH 02/52] glusterd: fix op-versions for RHS backwards + compatability + +Backport of https://code.engineering.redhat.com/gerrit/#/c/60485/ + +This change fixes the op-version of different features and checks to maintain +backwards compatability with RHS-3.0 and before. + +Label: DOWNSTREAM ONLY + +Change-Id: Icb282444da179b12fbd6ed9f491514602f1a38c2 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/70348 +--- + libglusterfs/src/glusterfs/globals.h | 45 +++-- + rpc/rpc-transport/socket/src/socket.c | 4 +- + xlators/cluster/dht/src/dht-shared.c | 6 +- + xlators/debug/io-stats/src/io-stats.c | 16 +- + xlators/features/barrier/src/barrier.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 8 +- + xlators/mgmt/glusterd/src/glusterd-handler.c | 14 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 16 +- + xlators/mgmt/glusterd/src/glusterd-peer-utils.c | 8 +- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 6 +- + xlators/mgmt/glusterd/src/glusterd-sm.c | 2 +- + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 12 +- + xlators/mgmt/glusterd/src/glusterd-snapshot.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-store.c | 27 +-- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-tier.c | 3 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 8 +- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 12 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 192 +++++++++++---------- + xlators/protocol/client/src/client.c | 4 +- + xlators/protocol/server/src/server.c | 6 +- + xlators/storage/posix/src/posix-common.c | 4 +- + 24 files changed, 214 insertions(+), 197 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 8d898c3..b9da872 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -23,23 +23,28 @@ + #define GF_AVOID_OVERWRITE "glusterfs.avoid.overwrite" + #define GF_CLEAN_WRITE_PROTECTION "glusterfs.clean.writexattr" + +-/* Gluster versions - OP-VERSION mapping ++/* RHS versions - OP-VERSION mapping + * +- * 3.3.x - 1 +- * 3.4.x - 2 +- * 3.5.0 - 3 +- * 3.5.1 - 30501 +- * 3.6.0 - 30600 +- * 3.7.0 - 30700 +- * 3.7.1 - 30701 +- * 3.7.2 - 30702 ++ * RHS-2.0 Z - 1 ++ * RHS-2.1 Z - 2 ++ * RHS-2.1 u5 - 20105 ++ * RHS-3.0 - 30000 ++ * RHS-3.0.4 - 30004 ++ * RHGS-3.1 - 30702 + * +- * Starting with Gluster v3.6, the op-version will be multi-digit integer values +- * based on the Glusterfs version, instead of a simply incrementing integer +- * value. The op-version for a given X.Y.Z release will be an integer XYZ, with +- * Y and Z 2 digit always 2 digits wide and padded with 0 when needed. This +- * should allow for some gaps between two Y releases for backports of features +- * in Z releases. ++ * ++ * NOTE: ++ * Starting with RHS-3.0, the op-version will be multi-digit integer values ++ * based on the RHS version, instead of a simply incrementing integer value. The ++ * op-version for a given RHS X(Major).Y(Minor).Z(Update) release will be an ++ * integer with digits XYZ. The Y and Z values will be 2 digits wide always ++ * padded with 0 as needed. This should allow for some gaps between two Y ++ * releases for backports of features in Z releases. ++ * ++ * NOTE: ++ * Starting with RHGS-3.1, the op-version will be the same as the upstream ++ * GlusterFS op-versions. This is to allow proper access to upstream clients of ++ * version 3.7.x or greater, proper access to the RHGS volumes. + */ + #define GD_OP_VERSION_MIN \ + 1 /* MIN is the fresh start op-version, mostly \ +@@ -51,7 +56,13 @@ + introduction of newer \ + versions */ + +-#define GD_OP_VERSION_3_6_0 30600 /* Op-Version for GlusterFS 3.6.0 */ ++#define GD_OP_VERSION_RHS_3_0 30000 /* Op-Version of RHS 3.0 */ ++ ++#define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_RHS_3_0 ++ ++#define GD_OP_VERSION_RHS_2_1_5 20105 /* RHS 2.1 update 5 */ ++ ++#define GD_OP_VERSION_RHS_3_0_4 30004 /* Op-Version of RHS 3.0.4 */ + + #define GD_OP_VERSION_3_7_0 30700 /* Op-version for GlusterFS 3.7.0 */ + +@@ -115,8 +126,6 @@ + + #define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */ + +-#define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_3_6_0 +- + #include "glusterfs/xlator.h" + #include "glusterfs/options.h" + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index fa0e0f2..121d46b 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -4704,7 +4704,7 @@ struct volume_options options[] = { + .description = "SSL CA list. Ignored if SSL is not enabled."}, + {.key = {"ssl-cert-depth"}, + .type = GF_OPTION_TYPE_INT, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Maximum certificate-chain depth. If zero, the " + "peer's certificate itself must be in the local " +@@ -4713,7 +4713,7 @@ struct volume_options options[] = { + "local list. Ignored if SSL is not enabled."}, + {.key = {"ssl-cipher-list"}, + .type = GF_OPTION_TYPE_STR, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Allowed SSL ciphers. Ignored if SSL is not enabled."}, + {.key = {"ssl-dh-param"}, +diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c +index c7ef2f1..ea4b7c6 100644 +--- a/xlators/cluster/dht/src/dht-shared.c ++++ b/xlators/cluster/dht/src/dht-shared.c +@@ -1064,7 +1064,7 @@ struct volume_options dht_options[] = { + "When enabled, files will be allocated to bricks " + "with a probability proportional to their size. Otherwise, all " + "bricks will have the same probability (legacy behavior).", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .level = OPT_STATUS_BASIC, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + +@@ -1161,7 +1161,7 @@ struct volume_options dht_options[] = { + "from which hash ranges are allocated starting with 0. " + "Note that we still use a directory/file's name to determine the " + "subvolume to which it hashes", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + }, + + {.key = {"rebal-throttle"}, +@@ -1174,7 +1174,7 @@ struct volume_options dht_options[] = { + "migrated at a time. Lazy will allow only one file to " + "be migrated at a time and aggressive will allow " + "max of [($(processing units) - 4) / 2), 4]", +- .op_version = {GD_OP_VERSION_3_7_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .level = OPT_STATUS_BASIC, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC + +diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c +index f12191f..41b57c5 100644 +--- a/xlators/debug/io-stats/src/io-stats.c ++++ b/xlators/debug/io-stats/src/io-stats.c +@@ -4333,7 +4333,7 @@ struct volume_options options[] = { + .value = {GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}}, + {.key = {"client-logger"}, + .type = GF_OPTION_TYPE_STR, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .default_value = GF_LOGGER_GLUSTER_LOG, +@@ -4342,7 +4342,7 @@ struct volume_options options[] = { + .value = {GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}}, + {.key = {"brick-logger"}, + .type = GF_OPTION_TYPE_STR, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .default_value = GF_LOGGER_GLUSTER_LOG, +@@ -4354,7 +4354,7 @@ struct volume_options options[] = { + .value = {GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}}, + {.key = {"client-log-format"}, + .type = GF_OPTION_TYPE_STR, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .default_value = GF_LOG_FORMAT_WITH_MSG_ID, +@@ -4362,7 +4362,7 @@ struct volume_options options[] = { + .value = {GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}}, + {.key = {"brick-log-format"}, + .type = GF_OPTION_TYPE_STR, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .default_value = GF_LOG_FORMAT_WITH_MSG_ID, +@@ -4377,7 +4377,7 @@ struct volume_options options[] = { + }, + {.key = {"client-log-buf-size"}, + .type = GF_OPTION_TYPE_INT, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .min = GF_LOG_LRU_BUFSIZE_MIN, +@@ -4388,7 +4388,7 @@ struct volume_options options[] = { + " the value of the option client-log-flush-timeout."}, + {.key = {"brick-log-buf-size"}, + .type = GF_OPTION_TYPE_INT, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .min = GF_LOG_LRU_BUFSIZE_MIN, +@@ -4406,7 +4406,7 @@ struct volume_options options[] = { + }, + {.key = {"client-log-flush-timeout"}, + .type = GF_OPTION_TYPE_TIME, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .min = GF_LOG_FLUSH_TIMEOUT_MIN, +@@ -4417,7 +4417,7 @@ struct volume_options options[] = { + " the value of the option client-log-flush-timeout."}, + {.key = {"brick-log-flush-timeout"}, + .type = GF_OPTION_TYPE_TIME, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .min = GF_LOG_FLUSH_TIMEOUT_MIN, +diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c +index a601c7f..0923992 100644 +--- a/xlators/features/barrier/src/barrier.c ++++ b/xlators/features/barrier/src/barrier.c +@@ -774,7 +774,7 @@ struct volume_options options[] = { + {.key = {"barrier"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "disable", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "When \"enabled\", blocks acknowledgements to application " + "for file operations such as rmdir, rename, unlink, " +@@ -784,7 +784,7 @@ struct volume_options options[] = { + {.key = {"barrier-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = BARRIER_TIMEOUT, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "After 'timeout' seconds since the time 'barrier' " + "option was set to \"on\", acknowledgements to file " +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index 38483a1..ad9a572 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -1195,7 +1195,7 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count, + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (conf->op_version >= GD_OP_VERSION_3_6_0) { ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { + brick_mount_dir = NULL; + + snprintf(key, sizeof(key), "brick%d.mount_dir", i); +@@ -1729,7 +1729,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (conf->op_version >= GD_OP_VERSION_3_6_0) { ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { + ret = glusterd_get_brick_mount_dir( + brickinfo->path, brickinfo->hostname, brickinfo->mount_dir); + if (ret) { +@@ -2085,12 +2085,12 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) + } + + /* Check if the connected clients are all of version +- * glusterfs-3.6 and higher. This is needed to prevent some data ++ * RHS-2.1u5 and higher. This is needed to prevent some data + * loss issues that could occur when older clients are connected + * when rebalance is run. + */ + ret = glusterd_check_client_op_version_support( +- volname, GD_OP_VERSION_3_6_0, NULL); ++ volname, GD_OP_VERSION_RHS_2_1_5, NULL); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume %s has one or " +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index f754b52..387643d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -763,7 +763,7 @@ glusterd_op_txn_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + } + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ +- if (priv->op_version < GD_OP_VERSION_3_6_0) { ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) { + ret = glusterd_lock(MY_UUID); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL, +@@ -818,7 +818,7 @@ glusterd_op_txn_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + local_locking_done: + /* If no volname is given as a part of the command, locks will + * not be held, hence sending stage event. */ +- if (volname || (priv->op_version < GD_OP_VERSION_3_6_0)) ++ if (volname || (priv->op_version < GD_OP_VERSION_RHS_3_0)) + event_type = GD_OP_EVENT_START_LOCK; + else { + txn_op_info.state.state = GD_OP_STATE_LOCK_SENT; +@@ -849,7 +849,7 @@ out: + if (locked && ret) { + /* Based on the op-version, we release the + * cluster or mgmt_v3 lock */ +- if (priv->op_version < GD_OP_VERSION_3_6_0) ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) + glusterd_unlock(MY_UUID); + else { + ret = glusterd_mgmt_v3_unlock(volname, MY_UUID, "vol"); +@@ -4432,12 +4432,12 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req) + } + + if ((cmd & GF_CLI_STATUS_SNAPD) && +- (conf->op_version < GD_OP_VERSION_3_6_0)) { ++ (conf->op_version < GD_OP_VERSION_RHS_3_0)) { + snprintf(err_str, sizeof(err_str), + "The cluster is operating " + "at a lesser version than %d. Getting the status of " + "snapd is not allowed in this state", +- GD_OP_VERSION_3_6_0); ++ GD_OP_VERSION_RHS_3_0); + ret = -1; + goto out; + } +@@ -4459,7 +4459,7 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req) + "The cluster is operating " + "at a lesser version than %d. Getting the status of " + "tierd is not allowed in this state", +- GD_OP_VERSION_3_6_0); ++ GD_OP_VERSION_RHS_3_0); + ret = -1; + goto out; + } +@@ -6430,7 +6430,7 @@ __glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata, + glusterd_friend_sm_state_name_get(peerinfo->state.state)); + + if (peerinfo->connected) { +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + glusterd_get_lock_owner(&uuid); + if (!gf_uuid_is_null(uuid) && + !gf_uuid_compare(peerinfo->uuid, uuid)) +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 6495a9d..dd3f9eb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -194,7 +194,7 @@ glusterd_generate_txn_id(dict_t *dict, uuid_t **txn_id) + if (!*txn_id) + goto out; + +- if (priv->op_version < GD_OP_VERSION_3_6_0) ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) + gf_uuid_copy(**txn_id, priv->global_txn_id); + else + gf_uuid_generate(**txn_id); +@@ -1864,12 +1864,12 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) + } + + if ((cmd & GF_CLI_STATUS_SNAPD) && +- (priv->op_version < GD_OP_VERSION_3_6_0)) { ++ (priv->op_version < GD_OP_VERSION_RHS_3_0)) { + snprintf(msg, sizeof(msg), + "The cluster is operating at " + "version less than %d. Getting the " + "status of snapd is not allowed in this state.", +- GD_OP_VERSION_3_6_0); ++ GD_OP_VERSION_RHS_3_0); + ret = -1; + goto out; + } +@@ -3877,7 +3877,7 @@ glusterd_op_ac_send_lock(glusterd_op_sm_event_t *event, void *ctx) + continue; + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ +- if (priv->op_version < GD_OP_VERSION_3_6_0) { ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) { + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK]; + if (proc->fn) { + ret = proc->fn(NULL, this, peerinfo); +@@ -3980,7 +3980,7 @@ glusterd_op_ac_send_unlock(glusterd_op_sm_event_t *event, void *ctx) + continue; + /* Based on the op_version, + * release the cluster or mgmt_v3 lock */ +- if (priv->op_version < GD_OP_VERSION_3_6_0) { ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) { + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK]; + if (proc->fn) { + ret = proc->fn(NULL, this, peerinfo); +@@ -4957,7 +4957,7 @@ glusterd_op_modify_op_ctx(glusterd_op_t op, void *ctx) + count = brick_index_max + other_count + 1; + + /* +- * a glusterd lesser than version 3.7 will be sending the ++ * a glusterd lesser than version RHS-3.0.4 will be sending the + * rdma port in older key. Changing that value from here + * to support backward compatibility + */ +@@ -4977,7 +4977,7 @@ glusterd_op_modify_op_ctx(glusterd_op_t op, void *ctx) + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) + goto out; +- if (conf->op_version < GD_OP_VERSION_3_7_0 && ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0_4 && + volinfo->transport_type == GF_TRANSPORT_RDMA) { + ret = glusterd_op_modify_port_key(op_ctx, brick_index_max); + if (ret) +@@ -5576,7 +5576,7 @@ glusterd_op_txn_complete(uuid_t *txn_id) + glusterd_op_clear_errstr(); + + /* Based on the op-version, we release the cluster or mgmt_v3 lock */ +- if (priv->op_version < GD_OP_VERSION_3_6_0) { ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) { + ret = glusterd_unlock(MY_UUID); + /* unlock can't/shouldn't fail here!! */ + if (ret) +diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +index 5b5959e..f24c86e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +@@ -547,7 +547,7 @@ out: + * @prefix. All the parameters are compulsory. + * + * The complete address list is added to the dict only if the cluster op-version +- * is >= GD_OP_VERSION_3_6_0 ++ * is >= GD_OP_VERSION_3_7_0 + */ + int + gd_add_friend_to_dict(glusterd_peerinfo_t *friend, dict_t *dict, +@@ -593,7 +593,7 @@ gd_add_friend_to_dict(glusterd_peerinfo_t *friend, dict_t *dict, + goto out; + } + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_3_7_0) { + ret = 0; + goto out; + } +@@ -778,7 +778,7 @@ gd_update_peerinfo_from_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict, + GF_FREE(peerinfo->hostname); + peerinfo->hostname = gf_strdup(hostname); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_3_7_0) { + ret = 0; + goto out; + } +@@ -894,7 +894,7 @@ gd_add_peer_hostnames_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict, + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_3_7_0) { + ret = 0; + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index 34b0294..6365b6e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -792,13 +792,13 @@ glusterd_mgmt_v3_op_stage_rebalance(dict_t *dict, char **op_errstr) + case GF_DEFRAG_CMD_START: + case GF_DEFRAG_CMD_START_LAYOUT_FIX: + /* Check if the connected clients are all of version +- * glusterfs-3.6 and higher. This is needed to prevent some data ++ * RHS-2.1u5 and higher. This is needed to prevent some data + * loss issues that could occur when older clients are connected + * when rebalance is run. This check can be bypassed by using + * 'force' + */ + ret = glusterd_check_client_op_version_support( +- volname, GD_OP_VERSION_3_6_0, NULL); ++ volname, GD_OP_VERSION_RHS_2_1_5, NULL); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume %s has one or " +diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +index ca1de1a..0615081 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c ++++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +@@ -297,7 +297,7 @@ glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr, + if (ret) + goto out; + +- } else if (priv->op_version >= GD_OP_VERSION_3_6_0) { ++ } else if (priv->op_version >= GD_OP_VERSION_RHS_3_0) { + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +@@ -396,7 +396,7 @@ glusterd_op_perform_replace_brick(glusterd_volinfo_t *volinfo, char *old_brick, + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (conf->op_version >= GD_OP_VERSION_3_6_0) { ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { + ret = dict_get_strn(dict, "brick1.mount_dir", SLEN("brick1.mount_dir"), + &brick_mount_dir); + if (ret) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +index 728781d..4ec9700 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +@@ -288,11 +288,11 @@ __glusterd_probe_cbk(struct rpc_req *req, struct iovec *iov, int count, + * we need to add the new hostname to the peer. + * + * This addition should only be done for cluster op-version >= +- * GD_OP_VERSION_3_6_0 as address lists are only supported from then on. ++ * GD_OP_VERSION_3_7_0 as address lists are only supported from then on. + * Also, this update should only be done when an explicit CLI probe + * command was used to begin the probe process. + */ +- if ((conf->op_version >= GD_OP_VERSION_3_6_0) && ++ if ((conf->op_version >= GD_OP_VERSION_3_7_0) && + (gf_uuid_compare(rsp.uuid, peerinfo->uuid) == 0)) { + ctx = ((call_frame_t *)myframe)->local; + /* Presence of ctx->req implies this probe was started by a cli +@@ -1544,7 +1544,7 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data) + goto out; + } + +- if (priv->op_version >= GD_OP_VERSION_3_6_0) { ++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) { + ret = glusterd_add_missed_snaps_to_export_dict(peer_data); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c +index 73a11a3..54a7bd1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c +@@ -955,7 +955,7 @@ glusterd_ac_handle_friend_add_req(glusterd_friend_sm_event_t *event, void *ctx) + + /* Compare missed_snapshot list with the peer * + * if volume comparison is successful */ +- if ((op_ret == 0) && (conf->op_version >= GD_OP_VERSION_3_6_0)) { ++ if ((op_ret == 0) && (conf->op_version >= GD_OP_VERSION_RHS_3_0)) { + ret = glusterd_import_friend_missed_snap_list(ev_ctx->vols); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index 1ece374..2958443 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -470,7 +470,7 @@ gd_add_brick_snap_details_to_dict(dict_t *dict, char *prefix, + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -547,7 +547,7 @@ gd_add_vol_snap_details_to_dict(dict_t *dict, char *prefix, + GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -846,7 +846,7 @@ gd_import_new_brick_snap_details(dict_t *dict, char *prefix, + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -903,8 +903,8 @@ out: + * Imports the snapshot details of a volume if required and available + * + * Snapshot details will be imported only if cluster.op_version is greater than +- * or equal to GD_OP_VERSION_3_6_0, the op-version from which volume snapshot is +- * supported. ++ * or equal to GD_OP_VERSION_RHS_3_0, the op-version from which volume snapshot ++ * is supported. + */ + int + gd_import_volume_snap_details(dict_t *dict, glusterd_volinfo_t *volinfo, +@@ -928,7 +928,7 @@ gd_import_volume_snap_details(dict_t *dict, glusterd_volinfo_t *volinfo, + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (volname != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +index 8f5cd6d..c56be91 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +@@ -9345,14 +9345,14 @@ glusterd_handle_snapshot_fn(rpcsvc_request_t *req) + goto out; + } + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + snprintf(err_str, sizeof(err_str), + "Cluster operating version" + " is lesser than the supported version " + "for a snapshot"); + op_errno = EG_OPNOTSUP; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, +- "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_3_6_0); ++ "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_RHS_3_0); + ret = -1; + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 7acea05..64447e7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -313,7 +313,7 @@ gd_store_brick_snap_details_write(int fd, glusterd_brickinfo_t *brickinfo) + GF_VALIDATE_OR_GOTO(this->name, (fd > 0), out); + GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -813,7 +813,7 @@ glusterd_volume_write_snap_details(int fd, glusterd_volinfo_t *volinfo) + GF_VALIDATE_OR_GOTO(this->name, (fd > 0), out); + GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -967,7 +967,7 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) + goto out; + } + +- if (conf->op_version >= GD_OP_VERSION_3_6_0) { ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { + snprintf(buf, sizeof(buf), "%d", volinfo->disperse_count); + ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, buf); + if (ret) +@@ -2502,7 +2502,7 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo) + conf = THIS->private; + GF_ASSERT(volinfo); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -2510,15 +2510,16 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo) + /* + * This is needed for upgrade situations. Say a volume is created with + * older version of glusterfs and upgraded to a glusterfs version equal +- * to or greater than GD_OP_VERSION_3_6_0. The older glusterd would not +- * have created the snapd.info file related to snapshot daemon for user +- * serviceable snapshots. So as part of upgrade when the new glusterd +- * starts, as part of restore (restoring the volume to be precise), it +- * tries to snapd related info from snapd.info file. But since there was +- * no such file till now, the restore operation fails. Thus, to prevent +- * it from happening check whether user serviceable snapshots features +- * is enabled before restoring snapd. If its disabled, then simply +- * exit by returning success (without even checking for the snapd.info). ++ * to or greater than GD_OP_VERSION_RHS_3_0. The older glusterd would ++ * not have created the snapd.info file related to snapshot daemon for ++ * user serviceable snapshots. So as part of upgrade when the new ++ * glusterd starts, as part of restore (restoring the volume to be ++ * precise), it tries to snapd related info from snapd.info file. But ++ * since there was no such file till now, the restore operation fails. ++ * Thus, to prevent it from happening check whether user serviceable ++ * snapshots features is enabled before restoring snapd. If its ++ * disbaled, then simply exit by returning success (without even ++ * checking for the snapd.info). + */ + + if (!dict_get_str_boolean(volinfo->dict, "features.uss", _gf_false)) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 45b221c..1741cf8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1827,7 +1827,7 @@ gd_sync_task_begin(dict_t *op_ctx, rpcsvc_request_t *req) + goto out; + } + +- if (conf->op_version < GD_OP_VERSION_3_6_0) ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) + cluster_lock = _gf_true; + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-tier.c b/xlators/mgmt/glusterd/src/glusterd-tier.c +index dd86cf5..4dc0d44 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-tier.c ++++ b/xlators/mgmt/glusterd/src/glusterd-tier.c +@@ -867,7 +867,8 @@ glusterd_op_stage_tier(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + * when rebalance is run. This check can be bypassed by using + * 'force' + */ +- ret = glusterd_check_client_op_version_support(volname, GD_OP_VERSION_3_6_0, ++ ret = glusterd_check_client_op_version_support(volname, ++ GD_OP_VERSION_RHS_3_0, + NULL); + if (ret) { + ret = gf_asprintf(op_errstr, +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 8bbd795..52b83ec 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -12226,10 +12226,10 @@ gd_update_volume_op_versions(glusterd_volinfo_t *volinfo) + } + + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { +- if (volinfo->op_version < GD_OP_VERSION_3_6_0) +- volinfo->op_version = GD_OP_VERSION_3_6_0; +- if (volinfo->client_op_version < GD_OP_VERSION_3_6_0) +- volinfo->client_op_version = GD_OP_VERSION_3_6_0; ++ if (volinfo->op_version < GD_OP_VERSION_3_7_0) ++ volinfo->op_version = GD_OP_VERSION_3_7_0; ++ if (volinfo->client_op_version < GD_OP_VERSION_3_7_0) ++ volinfo->client_op_version = GD_OP_VERSION_3_7_0; + } + + return; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 7cfba3d..86ef470 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1389,7 +1389,7 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (priv->op_version >= GD_OP_VERSION_3_6_0) { ++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) { + ret = glusterd_get_brick_mount_dir(brick_info->path, + brick_info->hostname, + brick_info->mount_dir); +@@ -1698,7 +1698,7 @@ glusterd_op_stage_start_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (priv->op_version >= GD_OP_VERSION_3_6_0) { ++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) { + if (strlen(brickinfo->mount_dir) < 1) { + ret = glusterd_get_brick_mount_dir( + brickinfo->path, brickinfo->hostname, brickinfo->mount_dir); +@@ -2395,10 +2395,10 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr) + volname); + goto out; + } +- if (priv->op_version < GD_OP_VERSION_3_6_0) { ++ if (priv->op_version < GD_OP_VERSION_3_7_0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, + "Disperse volume " +- "needs op-version 3.6.0 or higher"); ++ "needs op-version 30700 or higher"); + ret = -1; + goto out; + } +@@ -2494,7 +2494,7 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr) + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (priv->op_version >= GD_OP_VERSION_3_6_0) { ++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) { + brick_mount_dir = NULL; + ret = snprintf(key, sizeof(key), "brick%d.mount_dir", i); + ret = dict_get_strn(dict, key, ret, &brick_mount_dir); +@@ -2703,7 +2703,7 @@ glusterd_op_start_volume(dict_t *dict, char **op_errstr) + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (conf->op_version >= GD_OP_VERSION_3_6_0) { ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + brick_count++; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index dc58e11..d07fc10 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -807,7 +807,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "cluster.subvols-per-directory", + .voltype = "cluster/distribute", + .option = "directory-layout-spread", +- .op_version = 2, ++ .op_version = 1, + .validate_fn = validate_subvols_per_directory, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.readdir-optimize", +@@ -817,25 +817,25 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "cluster.rsync-hash-regex", + .voltype = "cluster/distribute", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.extra-hash-regex", + .voltype = "cluster/distribute", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.dht-xattr-name", + .voltype = "cluster/distribute", + .option = "xattr-name", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "cluster.randomize-hash-range-by-gfid", + .voltype = "cluster/distribute", + .option = "randomize-hash-range-by-gfid", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_3_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT, + }, + { +@@ -877,12 +877,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "cluster/nufa", + .option = "local-volume-name", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "cluster.weighted-rebalance", + .voltype = "cluster/distribute", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_3_7_0, + }, + + /* Switch xlator options (Distribute special case) */ +@@ -890,13 +890,13 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "cluster/distribute", + .option = "!switch", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.switch-pattern", + .voltype = "cluster/switch", + .option = "pattern.switch.case", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + + /* AFR xlator options */ +@@ -1014,16 +1014,16 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.readdir-failover", + .voltype = "cluster/replicate", +- .op_version = 2, ++ .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.ensure-durability", + .voltype = "cluster/replicate", +- .op_version = 3, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.consistent-metadata", + .voltype = "cluster/replicate", + .type = DOC, +- .op_version = GD_OP_VERSION_3_7_0, ++ .op_version = GD_OP_VERSION_RHS_3_0_4, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.heal-wait-queue-length", + .voltype = "cluster/replicate", +@@ -1080,45 +1080,45 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .key = "diagnostics.brick-logger", + .voltype = "debug/io-stats", + .option = "!logger", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "diagnostics.client-logger", + .voltype = "debug/io-stats", + .option = "!logger", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "diagnostics.brick-log-format", + .voltype = "debug/io-stats", + .option = "!log-format", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "diagnostics.client-log-format", + .voltype = "debug/io-stats", + .option = "!log-format", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "diagnostics.brick-log-buf-size", + .voltype = "debug/io-stats", + .option = "!log-buf-size", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "diagnostics.client-log-buf-size", + .voltype = "debug/io-stats", + .option = "!log-buf-size", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "diagnostics.brick-log-flush-timeout", + .voltype = "debug/io-stats", + .option = "!log-flush-timeout", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "diagnostics.client-log-flush-timeout", + .voltype = "debug/io-stats", + .option = "!log-flush-timeout", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "diagnostics.stats-dump-interval", + .voltype = "debug/io-stats", +@@ -1203,6 +1203,10 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "performance/io-threads", + .option = "pass-through", + .op_version = GD_OP_VERSION_4_1_0}, ++ {.key = "performance.least-rate-limit", ++ .voltype = "performance/io-threads", ++ .op_version = 1 ++ }, + + /* Other perf xlators' options */ + {.key = "performance.io-cache-pass-through", +@@ -1237,12 +1241,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.nfs.flush-behind", + .voltype = "performance/write-behind", + .option = "flush-behind", +- .op_version = 1, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.write-behind-window-size", + .voltype = "performance/write-behind", + .option = "cache-size", +- .op_version = 1, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "performance.resync-failed-syncs-after-fsync", +@@ -1262,27 +1266,27 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.nfs.write-behind-window-size", + .voltype = "performance/write-behind", + .option = "cache-size", +- .op_version = 1, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.strict-o-direct", + .voltype = "performance/write-behind", + .option = "strict-O_DIRECT", +- .op_version = 2, ++ .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.nfs.strict-o-direct", + .voltype = "performance/write-behind", + .option = "strict-O_DIRECT", +- .op_version = 2, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.strict-write-ordering", + .voltype = "performance/write-behind", + .option = "strict-write-ordering", +- .op_version = 2, ++ .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.nfs.strict-write-ordering", + .voltype = "performance/write-behind", + .option = "strict-write-ordering", +- .op_version = 2, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.write-behind-trickling-writes", + .voltype = "performance/write-behind", +@@ -1302,12 +1306,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.lazy-open", + .voltype = "performance/open-behind", + .option = "lazy-open", +- .op_version = 3, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.read-after-open", + .voltype = "performance/open-behind", + .option = "read-after-open", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "performance.open-behind-pass-through", +@@ -1389,22 +1393,22 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "encryption/crypt", + .option = "!feat", + .value = "off", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .description = "enable/disable client-side encryption for " + "the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + + {.key = "encryption.master-key", + .voltype = "encryption/crypt", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "encryption.data-key-size", + .voltype = "encryption/crypt", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "encryption.block-size", + .voltype = "encryption/crypt", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + + /* Client xlator options */ +@@ -1431,7 +1435,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "network.remote-dio", + .voltype = "protocol/client", + .option = "filter-O_DIRECT", +- .op_version = 2, ++ .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "client.own-thread", +@@ -1443,7 +1447,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + { + .key = "client.event-threads", + .voltype = "protocol/client", +- .op_version = GD_OP_VERSION_3_7_0, ++ .op_version = GD_OP_VERSION_RHS_3_0_4, + }, + {.key = "client.tcp-user-timeout", + .voltype = "protocol/client", +@@ -1501,7 +1505,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "server.root-squash", + .voltype = "protocol/server", + .option = "root-squash", +- .op_version = 2}, ++ .op_version = 1}, + {.key = "server.all-squash", + .voltype = "protocol/server", + .option = "all-squash", +@@ -1509,11 +1513,11 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "server.anonuid", + .voltype = "protocol/server", + .option = "anonuid", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "server.anongid", + .voltype = "protocol/server", + .option = "anongid", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "server.statedump-path", + .voltype = "protocol/server", + .option = "statedump-path", +@@ -1522,7 +1526,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "protocol/server", + .option = "rpc.outstanding-rpc-limit", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "server.ssl", + .voltype = "protocol/server", + .value = "off", +@@ -1540,12 +1544,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "the clients that are allowed to access the server." + "By default, all TLS authenticated clients are " + "allowed to access the server.", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "server.manage-gids", + .voltype = "protocol/server", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = "server.dynamic-auth", +@@ -1556,12 +1560,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .key = "client.send-gids", + .voltype = "protocol/client", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = "server.gid-timeout", + .voltype = "protocol/server", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = "server.own-thread", +@@ -1573,7 +1577,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + { + .key = "server.event-threads", + .voltype = "protocol/server", +- .op_version = GD_OP_VERSION_3_7_0, ++ .op_version = GD_OP_VERSION_RHS_3_0_4, + }, + { + .key = "server.tcp-user-timeout", +@@ -1643,13 +1647,13 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .key = SSL_CERT_DEPTH_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-cert-depth", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = SSL_CIPHER_LIST_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-cipher-list", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = SSL_DH_PARAM_OPT, +@@ -1690,8 +1694,8 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.readdir-ahead", + .voltype = "performance/readdir-ahead", + .option = "!perf", +- .value = "on", +- .op_version = 3, ++ .value = "off", ++ .op_version = GD_OP_VERSION_RHS_3_0, + .description = "enable/disable readdir-ahead translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.io-cache", +@@ -1804,7 +1808,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + /* Feature translators */ + {.key = "features.uss", + .voltype = "features/snapview-server", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .value = "off", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT, + .validate_fn = validate_uss, +@@ -1813,7 +1817,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + + {.key = "features.snapshot-directory", + .voltype = "features/snapview-client", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .value = ".snaps", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT, + .validate_fn = validate_uss_dir, +@@ -1823,7 +1827,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + + {.key = "features.show-snapshot-directory", + .voltype = "features/snapview-client", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .value = "off", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT, + .description = "show entry point in readdir output of " +@@ -1847,30 +1851,30 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "features/cdc", + .option = "!feat", + .value = "off", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .description = "enable/disable network compression translator", + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = "network.compression.window-size", + .voltype = "features/cdc", + .option = "window-size", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "network.compression.mem-level", + .voltype = "features/cdc", + .option = "mem-level", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "network.compression.min-size", + .voltype = "features/cdc", + .option = "min-size", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "network.compression.compression-level", + .voltype = "features/cdc", + .option = "compression-level", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "network.compression.debug", + .voltype = "features/cdc", + .option = "debug", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + #endif + + /* Quota xlator options */ +@@ -1886,28 +1890,28 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "features/quota", + .option = "default-soft-limit", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + }, + { + .key = "features.soft-timeout", + .voltype = "features/quota", + .option = "soft-timeout", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + }, + { + .key = "features.hard-timeout", + .voltype = "features/quota", + .option = "hard-timeout", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + }, + { + .key = "features.alert-time", + .voltype = "features/quota", + .option = "alert-time", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + }, + { + .key = "features.quota-deem-statfs", +@@ -2009,22 +2013,22 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "debug/error-gen", + .option = "failure", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "debug.error-number", + .voltype = "debug/error-gen", + .option = "error-no", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "debug.random-failure", + .voltype = "debug/error-gen", + .option = "random-failure", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "debug.error-fops", + .voltype = "debug/error-gen", + .option = "enable", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + + /* NFS xlator options */ + {.key = "nfs.enable-ino32", +@@ -2066,7 +2070,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "nfs/server", + .option = "rpc.outstanding-rpc-limit", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "nfs.port", + .voltype = "nfs/server", + .option = "nfs.port", +@@ -2128,7 +2132,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "nfs/server", + .option = "nfs.acl", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "nfs.mount-udp", + .voltype = "nfs/server", + .option = "nfs.mount-udp", +@@ -2144,14 +2148,14 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "nfs/server", + .option = "nfs.rpc-statd", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = "nfs.log-level", + .voltype = "nfs/server", + .option = "nfs.log-level", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "nfs.server-aux-gids", + .voltype = "nfs/server", +@@ -2162,27 +2166,27 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "nfs/server", + .option = "nfs.drc", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 1}, + {.key = "nfs.drc-size", + .voltype = "nfs/server", + .option = "nfs.drc-size", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 1}, + {.key = "nfs.read-size", + .voltype = "nfs/server", + .option = "nfs3.read-size", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "nfs.write-size", + .voltype = "nfs/server", + .option = "nfs3.write-size", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "nfs.readdir-size", + .voltype = "nfs/server", + .option = "nfs3.readdir-size", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "nfs.rdirplus", + .voltype = "nfs/server", + .option = "nfs.rdirplus", +@@ -2219,7 +2223,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "features.read-only", + .voltype = "features/read-only", + .option = "read-only", +- .op_version = 1, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "features.worm", + .voltype = "features/worm", +@@ -2266,14 +2270,14 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "storage.linux-aio", .voltype = "storage/posix", .op_version = 1}, + {.key = "storage.batch-fsync-mode", + .voltype = "storage/posix", +- .op_version = 3}, ++ .op_version = 2}, + {.key = "storage.batch-fsync-delay-usec", + .voltype = "storage/posix", +- .op_version = 3}, ++ .op_version = 2}, + { + .key = "storage.xattr-user-namespace-mode", + .voltype = "storage/posix", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "storage.owner-uid", + .voltype = "storage/posix", +@@ -2285,15 +2289,15 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = 1}, + {.key = "storage.node-uuid-pathinfo", + .voltype = "storage/posix", +- .op_version = 3}, ++ .op_version = 2}, + {.key = "storage.health-check-interval", + .voltype = "storage/posix", +- .op_version = 3}, ++ .op_version = 2}, + { + .option = "update-link-count-parent", + .key = "storage.build-pgfid", + .voltype = "storage/posix", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .option = "gfid2path", +@@ -2363,7 +2367,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_4_1_0, + }, +- {.key = "storage.bd-aio", .voltype = "storage/bd", .op_version = 3}, ++ {.key = "storage.bd-aio", ++ .voltype = "storage/bd", ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "config.memory-accounting", + .voltype = "mgmt/glusterd", + .option = "!config", +@@ -2385,37 +2391,37 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = GLUSTERD_QUORUM_TYPE_KEY, + .voltype = "mgmt/glusterd", + .value = "off", +- .op_version = 2}, ++ .op_version = 1}, + {.key = GLUSTERD_QUORUM_RATIO_KEY, + .voltype = "mgmt/glusterd", + .value = "0", +- .op_version = 2}, ++ .op_version = 1}, + /* changelog translator - global tunables */ + {.key = "changelog.changelog", + .voltype = "features/changelog", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "changelog.changelog-dir", + .voltype = "features/changelog", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "changelog.encoding", + .voltype = "features/changelog", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "changelog.rollover-time", + .voltype = "features/changelog", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "changelog.fsync-interval", + .voltype = "features/changelog", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + { + .key = "changelog.changelog-barrier-timeout", + .voltype = "features/changelog", + .value = BARRIER_TIMEOUT, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "changelog.capture-del-path", + .voltype = "features/changelog", +@@ -2426,18 +2432,18 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "features/barrier", + .value = "disable", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_3_7_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = "features.barrier-timeout", + .voltype = "features/barrier", + .value = BARRIER_TIMEOUT, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = GLUSTERD_GLOBAL_OP_VERSION_KEY, + .voltype = "mgmt/glusterd", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = GLUSTERD_MAX_OP_VERSION_KEY, +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index c8e84f6..dea6c28 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -3002,7 +3002,7 @@ struct volume_options options[] = { + {.key = {"send-gids"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE}, + {.key = {"event-threads"}, + .type = GF_OPTION_TYPE_INT, +@@ -3013,7 +3013,7 @@ struct volume_options options[] = { + "in parallel. Larger values would help process" + " responses faster, depending on available processing" + " power. Range 1-32 threads.", +- .op_version = {GD_OP_VERSION_3_7_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {NULL}}, + }; +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index b4b447b..6ae63ba 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -1854,13 +1854,13 @@ struct volume_options server_options[] = { + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Resolve groups on the server-side.", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"gid-timeout"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "300", + .description = "Timeout in seconds for the cached groups to expire.", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"event-threads"}, + .type = GF_OPTION_TYPE_INT, +@@ -1871,7 +1871,7 @@ struct volume_options server_options[] = { + "in parallel. Larger values would help process" + " responses faster, depending on available processing" + " power.", +- .op_version = {GD_OP_VERSION_3_7_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"dynamic-auth"}, + .type = GF_OPTION_TYPE_BOOL, +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index f0d8e3f..ed82e35 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -1243,7 +1243,7 @@ struct volume_options posix_options[] = { + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Enable placeholders for gfid to path conversion", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"gfid2path"}, + .type = GF_OPTION_TYPE_BOOL, +@@ -1279,7 +1279,7 @@ struct volume_options posix_options[] = { + " The raw filesystem will not be compatible with OS X Finder.\n" + "\t- Strip: Will strip the user namespace before setting. The raw " + "filesystem will work in OS X.\n", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + #endif + { +-- +1.8.3.1 + diff --git a/SOURCES/0003-rpc-set-bind-insecure-to-off-by-default.patch b/SOURCES/0003-rpc-set-bind-insecure-to-off-by-default.patch new file mode 100644 index 0000000..639b62f --- /dev/null +++ b/SOURCES/0003-rpc-set-bind-insecure-to-off-by-default.patch @@ -0,0 +1,51 @@ +From 9b58731c83bc1ee9c5f2a3cd58a8f845cf09ee82 Mon Sep 17 00:00:00 2001 +From: Prasanna Kumar Kalever +Date: Mon, 21 Mar 2016 13:54:19 +0530 +Subject: [PATCH 03/52] rpc: set bind-insecure to off by default + +commit 243a5b429f225acb8e7132264fe0a0835ff013d5 turn's 'ON' +allow-insecure and bind-insecure by default. + +Problem: +Now with newer versions we have bind-insecure 'ON' by default. +So, while upgrading subset of nodes from a trusted storage pool, +nodes which have older versions of glusterfs will expect +connection from secure ports only (since they still have +bind-insecure off) thus they reject connection from upgraded +nodes which now have insecure ports. + +Hence we will run into connection issues between peers. + +Solution: +This patch will turn bind-insecure 'OFF' by default to avoid +problem explained above. + +Label: DOWNSTREAM ONLY + +Change-Id: Id7a19b4872399d3b019243b0857c9c7af75472f7 +Signed-off-by: Prasanna Kumar Kalever +Reviewed-on: https://code.engineering.redhat.com/gerrit/70313 +Reviewed-by: Atin Mukherjee +Tested-by: Atin Mukherjee +--- + rpc/rpc-lib/src/rpc-transport.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c +index f9cbdf1..4beaaf9 100644 +--- a/rpc/rpc-lib/src/rpc-transport.c ++++ b/rpc/rpc-lib/src/rpc-transport.c +@@ -269,8 +269,8 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name) + else + trans->bind_insecure = 0; + } else { +- /* By default allow bind insecure */ +- trans->bind_insecure = 1; ++ /* Turning off bind insecure by default*/ ++ trans->bind_insecure = 0; + } + + ret = dict_get_str(options, "transport-type", &type); +-- +1.8.3.1 + diff --git a/SOURCES/0004-glusterd-spec-fixing-autogen-issue.patch b/SOURCES/0004-glusterd-spec-fixing-autogen-issue.patch new file mode 100644 index 0000000..f3cb2ec --- /dev/null +++ b/SOURCES/0004-glusterd-spec-fixing-autogen-issue.patch @@ -0,0 +1,47 @@ +From aa73240892a7072be68772370fd95173e6e77d10 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Mon, 21 Mar 2016 17:07:00 +0530 +Subject: [PATCH 04/52] glusterd/spec: fixing autogen issue + +Backport of https://code.engineering.redhat.com/gerrit/#/c/59463/ + +Because of the incorrect build section, autogen.sh wasn't re-run during the rpm +build process. The `extras/Makefile.in` was not regenerated with the changes +made to `extras/Makefile.am` in the firewalld patch. This meant that +`extras/Makefile` was generated without the firewalld changes. So the firewalld +config wasn't installed during `make install` and rpmbuild later failed when it +failed to find `/usr/lib/firewalld/glusterfs.xml` + +Label: DOWNSTREAM ONLY + +>Reviewed-on: https://code.engineering.redhat.com/gerrit/59463 + +Change-Id: I498bcceeacbd839640282eb6467c9f1464505697 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/70343 +Reviewed-by: Milind Changire +--- + glusterfs.spec.in | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index c655f16..f5c1f79 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -700,12 +700,7 @@ done + + %build + +-# RHEL6 and earlier need to manually replace config.guess and config.sub +-%if ( 0%{?rhel} && 0%{?rhel} <= 6 ) +-./autogen.sh +-%endif +- +-%configure \ ++./autogen.sh && %configure \ + %{?_with_asan} \ + %{?_with_cmocka} \ + %{?_with_debug} \ +-- +1.8.3.1 + diff --git a/SOURCES/0005-libglusterfs-glusterd-Fix-compilation-errors.patch b/SOURCES/0005-libglusterfs-glusterd-Fix-compilation-errors.patch new file mode 100644 index 0000000..5aa4f20 --- /dev/null +++ b/SOURCES/0005-libglusterfs-glusterd-Fix-compilation-errors.patch @@ -0,0 +1,36 @@ +From 44f758a56c5c5ad340ebc6d6a6478e8712c2c101 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Mon, 21 Mar 2016 22:31:02 +0530 +Subject: [PATCH 05/52] libglusterfs/glusterd: Fix compilation errors + +1. Removed duplicate definition of GD_OP_VER_PERSISTENT_AFR_XATTRS introduced in +d367a88 where GD_OP_VER_PERSISTENT_AFR_XATTRS was redfined + +2. Fixed incorrect op-version + +Label: DOWNSTREAM ONLY + +Change-Id: Icfa3206e8a41a11875641f57523732b80837f8f6 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/70384 +Reviewed-by: Nithya Balachandran +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 64447e7..51ca3d1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -967,7 +967,7 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) + goto out; + } + +- if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { ++ if (conf->op_version >= GD_OP_VERSION_3_7_0) { + snprintf(buf, sizeof(buf), "%d", volinfo->disperse_count); + ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, buf); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0006-build-remove-ghost-directory-entries.patch b/SOURCES/0006-build-remove-ghost-directory-entries.patch new file mode 100644 index 0000000..68dd8f3 --- /dev/null +++ b/SOURCES/0006-build-remove-ghost-directory-entries.patch @@ -0,0 +1,58 @@ +From 1f28e008825ae291208a9e6c714dd642f715a2a1 Mon Sep 17 00:00:00 2001 +From: "Bala.FA" +Date: Mon, 7 Apr 2014 15:24:10 +0530 +Subject: [PATCH 06/52] build: remove ghost directory entries + +ovirt requires hook directories for gluster management and ghost +directories are no more ghost entries + +Label: DOWNSTREAM ONLY + +Change-Id: Iaf1066ba0655619024f87eaaa039f0010578c567 +Signed-off-by: Bala.FA +Reviewed-on: https://code.engineering.redhat.com/gerrit/60133 +Tested-by: Milind Changire +--- + glusterfs.spec.in | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index f5c1f79..6be492e 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -799,15 +799,30 @@ install -D -p -m 0644 extras/glusterfs-logrotate \ + %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs + + %if ( 0%{!?_without_georeplication:1} ) +-# geo-rep ghosts + mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication + touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf + install -D -p -m 0644 extras/glusterfs-georep-logrotate \ + %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep + %endif + ++%if ( 0%{!?_without_syslog:1} ) ++%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) ++install -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \ ++ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example ++%endif ++ ++%if ( 0%{?rhel} && 0%{?rhel} == 6 ) ++install -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \ ++ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example ++%endif ++ ++%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 ) ++install -D -p -m 0644 extras/logger.conf.example \ ++ %{buildroot}%{_sysconfdir}/glusterfs/logger.conf.example ++%endif ++%endif ++ + %if ( 0%{!?_without_server:1} ) +-# the rest of the ghosts + touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info + touch %{buildroot}%{_sharedstatedir}/glusterd/options + subdirs=(add-brick create copy-file delete gsync-create remove-brick reset set start stop) +-- +1.8.3.1 + diff --git a/SOURCES/0007-build-add-RHGS-specific-changes.patch b/SOURCES/0007-build-add-RHGS-specific-changes.patch new file mode 100644 index 0000000..ac092bd --- /dev/null +++ b/SOURCES/0007-build-add-RHGS-specific-changes.patch @@ -0,0 +1,620 @@ +From 7744475550cd27f58f536741e9c50c639d3b02d8 Mon Sep 17 00:00:00 2001 +From: "Bala.FA" +Date: Thu, 6 Dec 2018 20:06:27 +0530 +Subject: [PATCH 07/52] build: add RHGS specific changes + +Label: DOWNSTREAM ONLY + +Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1074947 +Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1097782 +Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1115267 +Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1221743 +Change-Id: I08333334745adf2350e772c6454ffcfe9c08cb89 +Reviewed-on: https://code.engineering.redhat.com/gerrit/24983 +Reviewed-on: https://code.engineering.redhat.com/gerrit/25451 +Reviewed-on: https://code.engineering.redhat.com/gerrit/25518 +Reviewed-on: https://code.engineering.redhat.com/gerrit/25983 +Signed-off-by: Bala.FA +Reviewed-on: https://code.engineering.redhat.com/gerrit/60134 +Tested-by: Milind Changire +--- + glusterfs.spec.in | 485 +++++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 481 insertions(+), 4 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 6be492e..eb04491 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -95,9 +95,16 @@ + %{?_without_server:%global _without_server --without-server} + + # disable server components forcefully as rhel <= 6 +-%if ( 0%{?rhel} && 0%{?rhel} <= 6 ) ++%if ( 0%{?rhel} ) ++%if ( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" ) ++%global _without_server %{nil} ++%else + %global _without_server --without-server + %endif ++%endif ++ ++%global _without_extra_xlators 1 ++%global _without_regression_tests 1 + + # syslog + # if you wish to build rpms without syslog logging, compile like this +@@ -229,7 +236,8 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} + %else + Name: @PACKAGE_NAME@ + Version: @PACKAGE_VERSION@ +-Release: 0.@PACKAGE_RELEASE@%{?dist} ++Release: @PACKAGE_RELEASE@%{?dist} ++ExcludeArch: i686 + %endif + License: GPLv2 or LGPLv3+ + URL: http://docs.gluster.org/ +@@ -243,8 +251,6 @@ Source8: glusterfsd.init + Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz + %endif + +-BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) +- + Requires(pre): shadow-utils + %if ( 0%{?_with_systemd:1} ) + BuildRequires: systemd +@@ -384,7 +390,9 @@ This package provides cloudsync plugins for archival feature. + Summary: Development Libraries + Requires: %{name}%{?_isa} = %{version}-%{release} + # Needed for the Glupy examples to work ++%if ( 0%{!?_without_extra_xlators:1} ) + Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release} ++%endif + + %description devel + GlusterFS is a distributed file-system capable of scaling to several +@@ -397,6 +405,7 @@ is in user space and easily manageable. + + This package provides the development libraries and include files. + ++%if ( 0%{!?_without_extra_xlators:1} ) + %package extra-xlators + Summary: Extra Gluster filesystem Translators + # We need python-gluster rpm for gluster module's __init__.py in Python +@@ -415,6 +424,7 @@ is in user space and easily manageable. + + This package provides extra filesystem Translators, such as Glupy, + for GlusterFS. ++%endif + + %package fuse + Summary: Fuse client +@@ -440,6 +450,30 @@ is in user space and easily manageable. + This package provides support to FUSE based clients and inlcudes the + glusterfs(d) binary. + ++%if ( 0%{!?_without_server:1} ) ++%package ganesha ++Summary: NFS-Ganesha configuration ++Group: Applications/File ++ ++Requires: %{name}-server%{?_isa} = %{version}-%{release} ++Requires: nfs-ganesha-gluster, pcs, dbus ++%if ( 0%{?rhel} && 0%{?rhel} == 6 ) ++Requires: cman, pacemaker, corosync ++%endif ++ ++%description ganesha ++GlusterFS is a distributed file-system capable of scaling to several ++petabytes. It aggregates various storage bricks over Infiniband RDMA ++or TCP/IP interconnect into one large parallel network file ++system. GlusterFS is one of the most sophisticated file systems in ++terms of features and extensibility. It borrows a powerful concept ++called Translators from GNU Hurd kernel. Much of the code in GlusterFS ++is in user space and easily manageable. ++ ++This package provides the configuration and related files for using ++NFS-Ganesha as the NFS server using GlusterFS ++%endif ++ + %if ( 0%{!?_without_georeplication:1} ) + %package geo-replication + Summary: GlusterFS Geo-replication +@@ -541,6 +575,7 @@ is in user space and easily manageable. + This package provides support to ib-verbs library. + %endif + ++%if ( 0%{!?_without_regression_tests:1} ) + %package regression-tests + Summary: Development Tools + Requires: %{name}%{?_isa} = %{version}-%{release} +@@ -556,6 +591,7 @@ Requires: nfs-utils xfsprogs yajl psmisc bc + %description regression-tests + The Gluster Test Framework, is a suite of scripts used for + regression testing of Gluster. ++%endif + + %if ( 0%{!?_without_ocf:1} ) + %package resource-agents +@@ -1092,6 +1128,16 @@ exit 0 + %if 0%{?_tmpfilesdir:1} && 0%{!?_without_server:1} + %{_tmpfilesdir}/gluster.conf + %endif ++%if ( 0%{?_without_extra_xlators:1} ) ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/rot-13.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/testing/performance/symlink-cache.so ++%endif ++%if ( 0%{?_without_regression_tests:1} ) ++%exclude %{_datadir}/glusterfs/run-tests.sh ++%exclude %{_datadir}/glusterfs/tests ++%endif + + %files api + %exclude %{_libdir}/*.so +@@ -1134,12 +1180,14 @@ exit 0 + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/client.so + ++%if ( 0%{!?_without_extra_xlators:1} ) + %files extra-xlators + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so ++%endif + + %files fuse + # glusterfs is a symlink to glusterfsd, -server depends on -fuse. +@@ -1239,11 +1287,13 @@ exit 0 + %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma* + %endif + ++%if ( 0%{!?_without_regression_tests:1} ) + %files regression-tests + %dir %{_datadir}/glusterfs + %{_datadir}/glusterfs/run-tests.sh + %{_datadir}/glusterfs/tests + %exclude %{_datadir}/glusterfs/tests/vagrant ++%endif + + %if ( 0%{!?_without_ocf:1} ) + %files resource-agents +@@ -1424,6 +1474,433 @@ exit 0 + %endif + %endif + ++##----------------------------------------------------------------------------- ++## All %pretrans should be placed here and keep them sorted ++## ++%if 0%{!?_without_server:1} ++%pretrans -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ echo "ERROR: Distribute volumes detected. In-service rolling upgrade requires distribute volume(s) to be stopped." ++ echo "ERROR: Please stop distribute volume(s) before proceeding... exiting!" ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ echo "WARNING: Updating glusterfs requires its processes to be killed. This action does NOT incur downtime." ++ echo "WARNING: Ensure to wait for the upgraded server to finish healing before proceeding." ++ echo "WARNING: Refer upgrade section of install guide for more details" ++ echo "Please run # service glusterd stop; pkill glusterfs; pkill glusterfsd; pkill gsyncd.py;" ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%pretrans api -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-api_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%pretrans api-devel -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-api-devel_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%pretrans devel -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-devel_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%pretrans fuse -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-fuse_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%if 0%{?_can_georeplicate} ++%if ( 0%{!?_without_georeplication:1} ) ++%pretrans geo-replication -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-geo-replication_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++%endif ++%endif ++ ++ ++ ++%pretrans libs -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-libs_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%if ( 0%{!?_without_rdma:1} ) ++%pretrans rdma -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-rdma_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++%endif ++ ++ ++ ++%if ( 0%{!?_without_ocf:1} ) ++%pretrans resource-agents -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-resource-agents_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++%endif ++ ++ ++ ++%pretrans server -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-server_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++%endif ++ + %changelog + * Wed Mar 6 2019 Kaleb S. KEITHLEY + - remove unneeded ldconfig in scriptlets +-- +1.8.3.1 + diff --git a/SOURCES/0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch b/SOURCES/0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch new file mode 100644 index 0000000..66a39d2 --- /dev/null +++ b/SOURCES/0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch @@ -0,0 +1,35 @@ +From 0ab54c5b274f29fcdd4787325c7183a84e875bbc Mon Sep 17 00:00:00 2001 +From: "Bala.FA" +Date: Thu, 22 May 2014 08:37:27 +0530 +Subject: [PATCH 08/52] secalert: remove setuid bit for fusermount-glusterfs + +glusterfs-fuse: File /usr/bin/fusermount-glusterfs on x86_64 is setuid +root but is not on the setxid whitelist + +Label: DOWNSTREAM ONLY + +Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=989480 +Change-Id: Icf6e5db72ae15ccc60b02be6713fb6c4f4c8a15f +Signed-off-by: Bala.FA +Reviewed-on: https://code.engineering.redhat.com/gerrit/25453 +Signed-off-by: Bala.FA +Reviewed-on: https://code.engineering.redhat.com/gerrit/60135 +Tested-by: Milind Changire +--- + contrib/fuse-util/Makefile.am | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/contrib/fuse-util/Makefile.am b/contrib/fuse-util/Makefile.am +index abbc10e..a071c81 100644 +--- a/contrib/fuse-util/Makefile.am ++++ b/contrib/fuse-util/Makefile.am +@@ -9,6 +9,5 @@ AM_CFLAGS = -Wall $(GF_CFLAGS) + + install-exec-hook: + -chown root $(DESTDIR)$(bindir)/fusermount-glusterfs +- chmod u+s $(DESTDIR)$(bindir)/fusermount-glusterfs + + CLEANFILES = +-- +1.8.3.1 + diff --git a/SOURCES/0009-build-introduce-security-hardening-flags-in-gluster.patch b/SOURCES/0009-build-introduce-security-hardening-flags-in-gluster.patch new file mode 100644 index 0000000..7cfe937 --- /dev/null +++ b/SOURCES/0009-build-introduce-security-hardening-flags-in-gluster.patch @@ -0,0 +1,57 @@ +From 2adb5d540e9344149ae2591811ad34928775e6fd Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Wed, 3 Jun 2015 11:09:21 +0530 +Subject: [PATCH 09/52] build: introduce security hardening flags in gluster + +This patch introduces two of the security hardening compiler flags RELRO & PIE +in gluster codebase. Using _hardened_build as 1 doesn't guarantee the existance +of these flags in the compilation as different versions of RHEL have different +redhat-rpm-config macro. So the idea is to export these flags at spec file +level. + +Label: DOWNSTREAM ONLY + +Change-Id: I0a1a56d0a8f54f110d306ba5e55e39b1b073dc84 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/49780 +Reviewed-by: Balamurugan Arumugam +Tested-by: Balamurugan Arumugam +Reviewed-on: https://code.engineering.redhat.com/gerrit/60137 +Tested-by: Milind Changire +--- + glusterfs.spec.in | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index eb04491..8a31a98 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -736,6 +736,25 @@ done + + %build + ++# In RHEL7 few hardening flags are available by default, however the RELRO ++# default behaviour is partial, convert to full ++%if ( 0%{?rhel} && 0%{?rhel} >= 7 ) ++LDFLAGS="$RPM_LD_FLAGS -Wl,-z,relro,-z,now" ++export LDFLAGS ++%else ++%if ( 0%{?rhel} && 0%{?rhel} == 6 ) ++CFLAGS="$RPM_OPT_FLAGS -fPIE -DPIE" ++LDFLAGS="$RPM_LD_FLAGS -pie -Wl,-z,relro,-z,now" ++%else ++#It appears that with gcc-4.1.2 in RHEL5 there is an issue using both -fPIC and ++ # -fPIE that makes -z relro not work; -fPIE seems to undo what -fPIC does ++CFLAGS="$CFLAGS $RPM_OPT_FLAGS" ++LDFLAGS="$RPM_LD_FLAGS -Wl,-z,relro,-z,now" ++%endif ++export CFLAGS ++export LDFLAGS ++%endif ++ + ./autogen.sh && %configure \ + %{?_with_asan} \ + %{?_with_cmocka} \ +-- +1.8.3.1 + diff --git a/SOURCES/0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch b/SOURCES/0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch new file mode 100644 index 0000000..9226936 --- /dev/null +++ b/SOURCES/0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch @@ -0,0 +1,100 @@ +From bf5906cbc9bf986c7495db792d098001e28c47e3 Mon Sep 17 00:00:00 2001 +From: Niels de Vos +Date: Wed, 22 Apr 2015 15:39:59 +0200 +Subject: [PATCH 10/52] spec: fix/add pre-transaction scripts for geo-rep and + cli packages + +The cli subpackage never had a %pretrans script, this has been added +now. + +The %pretrans script for ge-repliaction was never included in the RPM +package because it was disable by a undefined macro (_can_georeplicate). +This macro is not used/set anywhere else and _without_georeplication +should take care of it anyway. + +Note: This is a Red Hat Gluster Storage specific patch. Upstream + packaging guidelines do not allow these kind of 'features'. + +Label: DOWNSTREAM ONLY + +Change-Id: I16aab5bba72f1ed178f3bcac47f9d8ef767cfcef +Signed-off-by: Niels de Vos +Signed-off-by: Bala.FA +Reviewed-on: https://code.engineering.redhat.com/gerrit/50491 +Reviewed-on: https://code.engineering.redhat.com/gerrit/60138 +Tested-by: Milind Changire +--- + glusterfs.spec.in | 43 +++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 41 insertions(+), 2 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 8a31a98..b70dbfc 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1626,6 +1626,47 @@ end + + + ++%pretrans cli -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-cli_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ + %pretrans devel -p + if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd +@@ -1708,7 +1749,6 @@ end + + + +-%if 0%{?_can_georeplicate} + %if ( 0%{!?_without_georeplication:1} ) + %pretrans geo-replication -p + if not posix.access("/bin/bash", "x") then +@@ -1749,7 +1789,6 @@ if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end + %endif +-%endif + + + +-- +1.8.3.1 + diff --git a/SOURCES/0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch b/SOURCES/0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch new file mode 100644 index 0000000..cc79317 --- /dev/null +++ b/SOURCES/0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch @@ -0,0 +1,138 @@ +From 40eb62a8872ce061416e899fb6c0784b6253ab16 Mon Sep 17 00:00:00 2001 +From: Niels de Vos +Date: Fri, 7 Dec 2018 14:05:21 +0530 +Subject: [PATCH 11/52] rpm: glusterfs-devel for client-builds should not + depend on -server + +glusterfs-devel for client-side packages should *not* include the +libgfdb.so symlink and libgfdb.pc file or any of the libchangelog +ones. + +Label: DOWNSTREAM ONLY + +Change-Id: Ifb4a9cf48841e5af5dd0a98b6de51e2ee469fc56 +Signed-off-by: Niels de Vos +Reviewed-on: https://code.engineering.redhat.com/gerrit/51019 +Reviewed-by: Balamurugan Arumugam +Tested-by: Balamurugan Arumugam +Reviewed-on: https://code.engineering.redhat.com/gerrit/60139 +Tested-by: Milind Changire +--- + glusterfs.spec.in | 86 +++++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 62 insertions(+), 24 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index b70dbfc..1c631db 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -450,30 +450,6 @@ is in user space and easily manageable. + This package provides support to FUSE based clients and inlcudes the + glusterfs(d) binary. + +-%if ( 0%{!?_without_server:1} ) +-%package ganesha +-Summary: NFS-Ganesha configuration +-Group: Applications/File +- +-Requires: %{name}-server%{?_isa} = %{version}-%{release} +-Requires: nfs-ganesha-gluster, pcs, dbus +-%if ( 0%{?rhel} && 0%{?rhel} == 6 ) +-Requires: cman, pacemaker, corosync +-%endif +- +-%description ganesha +-GlusterFS is a distributed file-system capable of scaling to several +-petabytes. It aggregates various storage bricks over Infiniband RDMA +-or TCP/IP interconnect into one large parallel network file +-system. GlusterFS is one of the most sophisticated file systems in +-terms of features and extensibility. It borrows a powerful concept +-called Translators from GNU Hurd kernel. Much of the code in GlusterFS +-is in user space and easily manageable. +- +-This package provides the configuration and related files for using +-NFS-Ganesha as the NFS server using GlusterFS +-%endif +- + %if ( 0%{!?_without_georeplication:1} ) + %package geo-replication + Summary: GlusterFS Geo-replication +@@ -1157,6 +1133,62 @@ exit 0 + %exclude %{_datadir}/glusterfs/run-tests.sh + %exclude %{_datadir}/glusterfs/tests + %endif ++%if 0%{?_without_server:1} ++%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-5.8.conf ++%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-7.2.conf ++%exclude %{_sysconfdir}/glusterfs/glusterd.vol ++%exclude %{_sysconfdir}/glusterfs/glusterfs-georep-logrotate ++%exclude %{_sysconfdir}/glusterfs/glusterfs-logrotate ++%exclude %{_sysconfdir}/glusterfs/group-db-workload ++%exclude %{_sysconfdir}/glusterfs/group-distributed-virt ++%exclude %{_sysconfdir}/glusterfs/group-gluster-block ++%exclude %{_sysconfdir}/glusterfs/group-metadata-cache ++%exclude %{_sysconfdir}/glusterfs/group-nl-cache ++%exclude %{_sysconfdir}/glusterfs/group-virt.example ++%exclude %{_sysconfdir}/glusterfs/logger.conf.example ++%exclude %{_sysconfdir}/rsyslog.d/gluster.conf.example ++%exclude %{_prefix}/bin/glusterfind ++%exclude %{_prefix}/lib/firewalld/services/glusterfs.xml ++%exclude %{_prefix}/lib/systemd/system/glusterd.service ++%exclude %{_prefix}/lib/systemd/system/glusterfssharedstorage.service ++%exclude %{_prefix}/lib/tmpfiles.d/gluster.conf ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/marker.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix-locks.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quota.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quotad.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/selinux.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-server.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/decompounder.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so ++%exclude %{_libexecdir}/glusterfs/* ++%exclude %{_sbindir}/conf.py ++%exclude %{_sbindir}/gcron.py ++%exclude %{_sbindir}/gf_attach ++%exclude %{_sbindir}/gfind_missing_files ++%exclude %{_sbindir}/glfsheal ++%exclude %{_sbindir}/gluster ++%exclude %{_sbindir}/gluster-setgfid2path ++%exclude %{_sbindir}/glusterd ++%exclude %{_sbindir}/snap_scheduler.py ++%exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh ++%exclude %{_datadir}/glusterfs/scripts/control-mem.sh ++%exclude %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh ++%exclude %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh ++%exclude %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh ++%exclude %{_sharedstatedir}/glusterd/* ++%endif + + %files api + %exclude %{_libdir}/*.so +@@ -1190,7 +1222,13 @@ exit 0 + %exclude %{_includedir}/glusterfs/api + %exclude %{_libdir}/libgfapi.so + %{_libdir}/*.so ++%if ( 0%{?_without_server:1} ) ++%exclude %{_libdir}/pkgconfig/libgfchangelog.pc ++%exclude %{_libdir}/libgfchangelog.so ++%else + %{_libdir}/pkgconfig/libgfchangelog.pc ++%{_libdir}/libgfchangelog.so ++%endif + + %files client-xlators + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +-- +1.8.3.1 + diff --git a/SOURCES/0012-build-add-pretrans-check.patch b/SOURCES/0012-build-add-pretrans-check.patch new file mode 100644 index 0000000..efac62f --- /dev/null +++ b/SOURCES/0012-build-add-pretrans-check.patch @@ -0,0 +1,73 @@ +From f054086daf4549a6227196fe37a57a7e49aa5849 Mon Sep 17 00:00:00 2001 +From: "Bala.FA" +Date: Fri, 7 Dec 2018 14:13:40 +0530 +Subject: [PATCH 12/52] build: add pretrans check + +This patch adds pretrans check for client-xlators + +NOTE: ganesha and python-gluster sub-packages are now obsolete + +Label: DOWNSTREAM ONLY + +Change-Id: I454016319832c11902c0ca79a79fbbcf8ac0a121 +Signed-off-by: Bala.FA +Reviewed-on: https://code.engineering.redhat.com/gerrit/50967 +Reviewed-on: https://code.engineering.redhat.com/gerrit/60140 +Tested-by: Milind Changire +--- + glusterfs.spec.in | 39 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 39 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 1c631db..a1ff6e0 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1704,6 +1704,45 @@ if not (ok == 0) then + end + + ++%pretrans client-xlators -p ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-client-xlators_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ + + %pretrans devel -p + if not posix.access("/bin/bash", "x") then +-- +1.8.3.1 + diff --git a/SOURCES/0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch b/SOURCES/0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch new file mode 100644 index 0000000..5873f3e --- /dev/null +++ b/SOURCES/0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch @@ -0,0 +1,50 @@ +From 39932e6bbc8de25813387bb1394cc7942b79ef46 Mon Sep 17 00:00:00 2001 +From: anand +Date: Wed, 18 Nov 2015 16:13:46 +0530 +Subject: [PATCH 13/52] glusterd: fix info file checksum mismatch during + upgrade + +peers are moving rejected state when upgrading from RHS2.1 to RHGS3.1.2 +due to checksum mismatch. + +Label: DOWNSTREAM ONLY + +Change-Id: Ifea6b7dfe8477c7f17eefc5ca87ced58aaa21c84 +Signed-off-by: anand +Reviewed-on: https://code.engineering.redhat.com/gerrit/61774 +Reviewed-by: Atin Mukherjee +Tested-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 51ca3d1..fb52957 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -1009,10 +1009,18 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) + goto out; + } + +- snprintf(buf, sizeof(buf), "%d", volinfo->op_version); +- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf); +- if (ret) +- goto out; ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { ++ snprintf (buf, sizeof (buf), "%d", volinfo->op_version); ++ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf); ++ if (ret) ++ goto out; ++ ++ snprintf (buf, sizeof (buf), "%d", volinfo->client_op_version); ++ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, ++ buf); ++ if (ret) ++ goto out; ++ } + + snprintf(buf, sizeof(buf), "%d", volinfo->client_op_version); + ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, +-- +1.8.3.1 + diff --git a/SOURCES/0014-build-spec-file-conflict-resolution.patch b/SOURCES/0014-build-spec-file-conflict-resolution.patch new file mode 100644 index 0000000..fb8aeba --- /dev/null +++ b/SOURCES/0014-build-spec-file-conflict-resolution.patch @@ -0,0 +1,72 @@ +From f76d2370160c50a1f59d08a03a444254c289da60 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Fri, 7 Dec 2018 16:18:07 +0530 +Subject: [PATCH 14/52] build: spec file conflict resolution + +Missed conflict resolution for removing references to +gluster.conf.example as mentioned in patch titled: +packaging: gratuitous dependencies on rsyslog-mm{count,jsonparse} +by Kaleb + +References to hook scripts S31ganesha-start.sh and +S31ganesha-reset.sh got lost in the downstream only +patch conflict resolution. + +Commented blanket reference to %{_sharedsstatedir}/glusterd/* +in section %files server to avoid rpmbuild warning related to +multiple references to hook scripts and other files under +/var/lib/glusterd. + +Label: DOWNSTREAM ONLY + +Change-Id: I9d409f1595ab985ed9f79d9d4f4298877609ba17 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/70535 +Reviewed-by: Rajesh Joseph +Tested-by: Rajesh Joseph +--- + glusterfs.spec.in | 21 +-------------------- + 1 file changed, 1 insertion(+), 20 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index a1ff6e0..8c57f57 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -96,9 +96,7 @@ + + # disable server components forcefully as rhel <= 6 + %if ( 0%{?rhel} ) +-%if ( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" ) +-%global _without_server %{nil} +-%else ++%if (!(( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" ))) + %global _without_server --without-server + %endif + %endif +@@ -836,23 +834,6 @@ install -D -p -m 0644 extras/glusterfs-georep-logrotate \ + %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep + %endif + +-%if ( 0%{!?_without_syslog:1} ) +-%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) +-install -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \ +- %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example +-%endif +- +-%if ( 0%{?rhel} && 0%{?rhel} == 6 ) +-install -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \ +- %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example +-%endif +- +-%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 ) +-install -D -p -m 0644 extras/logger.conf.example \ +- %{buildroot}%{_sysconfdir}/glusterfs/logger.conf.example +-%endif +-%endif +- + %if ( 0%{!?_without_server:1} ) + touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info + touch %{buildroot}%{_sharedstatedir}/glusterd/options +-- +1.8.3.1 + diff --git a/SOURCES/0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch b/SOURCES/0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch new file mode 100644 index 0000000..b82e19b --- /dev/null +++ b/SOURCES/0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch @@ -0,0 +1,198 @@ +From 3d0e09400dc21dbb5f76fd9ca4bfce3edad0d626 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Fri, 14 Oct 2016 12:53:27 +0530 +Subject: [PATCH 15/52] build: randomize temp file names in pretrans scriptlets + +Security issue CVE-2015-1795 mentions about possibility of file name +spoof attack for the %pretrans server scriptlet. +Since %pretrans scriptlets are executed only for server builds, we can +use os.tmpname() to randomize temporary file names for all %pretrans +scriptlets using this mechanism. + +Label: DOWNSTREAM ONLY + +Change-Id: Ic82433897432794b6d311d836355aa4bad886369 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/86187 +Reviewed-by: Siddharth Sharma +Reviewed-by: Niels de Vos +Reviewed-by: Atin Mukherjee +--- + glusterfs.spec.in | 84 +++++++++++++++++++++++++++++++------------------------ + 1 file changed, 48 insertions(+), 36 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 8c57f57..3a98822 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1549,9 +1549,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1590,9 +1591,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-api_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1631,9 +1633,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-api-devel_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1672,9 +1675,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-cli_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1712,9 +1716,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-client-xlators_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1752,9 +1757,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-devel_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1793,9 +1799,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-fuse_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1835,9 +1842,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-geo-replication_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1877,9 +1885,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-libs_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1919,9 +1928,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-rdma_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1962,9 +1972,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-resource-agents_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -2004,9 +2015,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-server_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +-- +1.8.3.1 + diff --git a/SOURCES/0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch b/SOURCES/0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch new file mode 100644 index 0000000..402b835 --- /dev/null +++ b/SOURCES/0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch @@ -0,0 +1,42 @@ +From c283f15ac9bfb1c98ce95ed0000ebed81cd3b318 Mon Sep 17 00:00:00 2001 +From: Poornima G +Date: Wed, 26 Apr 2017 14:07:58 +0530 +Subject: [PATCH 16/52] glusterd, parallel-readdir: Change the op-version of + parallel-readdir to 31100 + +Issue: Downstream 3.2 was released with op-version 31001, parallel-readdir +feature in upstream was released in 3.10 and hence with op-version 31000. +With this, parallel-readdir will be allowed in 3.2 cluster/clients as well. +But 3.2 didn't have parallel-readdir feature backported. + +Fix: +Increase the op-version of parallel-readdir feature only in downstream +to 31100(3.3 highest op-version) + +Label: DOWNSTREAM ONLY + +Change-Id: I2640520985627f3a1cb4fb96e28350f8bb9b146c +Signed-off-by: Poornima G +Reviewed-on: https://code.engineering.redhat.com/gerrit/104403 +Reviewed-by: Atin Mukherjee +Tested-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index d07fc10..a31ecda 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2718,7 +2718,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .option = "parallel-readdir", + .value = "off", + .type = DOC, +- .op_version = GD_OP_VERSION_3_10_0, ++ .op_version = GD_OP_VERSION_3_11_0, + .validate_fn = validate_parallel_readdir, + .description = "If this option is enabled, the readdir operation " + "is performed in parallel on all the bricks, thus " +-- +1.8.3.1 + diff --git a/SOURCES/0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch b/SOURCES/0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch new file mode 100644 index 0000000..f536c9c --- /dev/null +++ b/SOURCES/0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch @@ -0,0 +1,37 @@ +From 5d3315a53611f23a69f88bc8266448e258e2e10f Mon Sep 17 00:00:00 2001 +From: Samikshan Bairagya +Date: Mon, 10 Jul 2017 11:54:52 +0530 +Subject: [PATCH 17/52] glusterd: Revert op-version for + "cluster.max-brick-per-process" + +The op-version for the "cluster.max-brick-per-process" option was +set to 3.12.0 in the upstream patch and was backported here: +https://code.engineering.redhat.com/gerrit/#/c/111799. This commit +reverts the op-version for this option to 3.11.1 instead. + +Label: DOWNSTREAM ONLY + +Change-Id: I23639cef43d41915eea0394d019b1e0796a99d7b +Signed-off-by: Samikshan Bairagya +Reviewed-on: https://code.engineering.redhat.com/gerrit/111804 +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index a31ecda..9a6fe9f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2794,7 +2794,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = GLUSTERD_BRICKMUX_LIMIT_KEY, + .voltype = "mgmt/glusterd", + .value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE, +- .op_version = GD_OP_VERSION_3_12_0, ++ .op_version = GD_OP_VERSION_3_11_1, + .validate_fn = validate_mux_limit, + .type = GLOBAL_DOC, + .description = "This option can be used to limit the number of brick " +-- +1.8.3.1 + diff --git a/SOURCES/0018-cli-Add-message-for-user-before-modifying-brick-mult.patch b/SOURCES/0018-cli-Add-message-for-user-before-modifying-brick-mult.patch new file mode 100644 index 0000000..ab3530e --- /dev/null +++ b/SOURCES/0018-cli-Add-message-for-user-before-modifying-brick-mult.patch @@ -0,0 +1,56 @@ +From 539626a64e5b8cfe05d42f5398073e8a57644073 Mon Sep 17 00:00:00 2001 +From: Samikshan Bairagya +Date: Wed, 9 Aug 2017 14:32:59 +0530 +Subject: [PATCH 18/52] cli: Add message for user before modifying + brick-multiplex option + +Users should ne notified that brick-multiplexing feature is +supported only for container workloads (CNS/CRS). It should also be +made known to users that it is advisable to either have all volumes +in stopped state or have no bricks running before modifying the +"brick-multiplex" option. This commit makes sure these messages +are displayed to the user before brick-multiplexing is enabled or +disabled. + +Label: DOWNSTREAM ONLY + +Change-Id: Ic40294b26c691ea03185c4d1fce840ef23f95718 +Signed-off-by: Samikshan Bairagya +Reviewed-on: https://code.engineering.redhat.com/gerrit/114793 +Reviewed-by: Atin Mukherjee +--- + cli/src/cli-cmd-parser.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index d9913f6..f148c59 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1698,6 +1698,24 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + } + } + ++ if ((strcmp (key, "cluster.brick-multiplex") == 0)) { ++ question = "Brick-multiplexing is supported only for " ++ "container workloads (CNS/CRS). Also it is " ++ "advised to make sure that either all " ++ "volumes are in stopped state or no bricks " ++ "are running before this option is modified." ++ "Do you still want to continue?"; ++ ++ answer = cli_cmd_get_confirmation (state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log ("cli", GF_LOG_ERROR, "Operation " ++ "cancelled, exiting"); ++ *op_errstr = gf_strdup ("Aborted by user."); ++ ret = -1; ++ goto out; ++ } ++ } ++ + ret = dict_set_int32(dict, "count", wordcount - 3); + + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch b/SOURCES/0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch new file mode 100644 index 0000000..e1287c9 --- /dev/null +++ b/SOURCES/0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch @@ -0,0 +1,99 @@ +From 8a3035bf612943694a3cd1c6a857bd009e84f55d Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Tue, 10 Oct 2017 09:58:24 +0530 +Subject: [PATCH 19/52] build: launch glusterd upgrade after all new bits are + installed + +Problem: +glusterd upgrade mode needs new bits from glusterfs-rdma which +optional and causes the dependency graph to break since it is +not tied into glusterfs-server requirements + +Solution: +Run glusterd upgrade mode after all new bits are installed +i.e. in %posttrans server section + +Label: DOWNSTREAM ONLY + +Change-Id: I356e02d0bf0eaaef43c20ce07b388262f63093a4 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/120094 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +Reviewed-by: Raghavendra Talur +--- + glusterfs.spec.in | 51 +++++++++++++++++++++++++++++---------------------- + 1 file changed, 29 insertions(+), 22 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 3a98822..208a82d 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -946,28 +946,6 @@ fi + %firewalld_reload + %endif + +-pidof -c -o %PPID -x glusterd &> /dev/null +-if [ $? -eq 0 ]; then +- kill -9 `pgrep -f gsyncd.py` &> /dev/null +- +- killall --wait glusterd &> /dev/null +- glusterd --xlator-option *.upgrade=on -N +- +- #Cleaning leftover glusterd socket file which is created by glusterd in +- #rpm_script_t context. +- rm -f %{_rundir}/glusterd.socket +- +- # glusterd _was_ running, we killed it, it exited after *.upgrade=on, +- # so start it again +- %service_start glusterd +-else +- glusterd --xlator-option *.upgrade=on -N +- +- #Cleaning leftover glusterd socket file which is created by glusterd in +- #rpm_script_t context. +- rm -f %{_rundir}/glusterd.socket +-fi +-exit 0 + %endif + + ##----------------------------------------------------------------------------- +@@ -2027,6 +2005,35 @@ os.remove(tmpname) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end ++ ++%posttrans server ++pidof -c -o %PPID -x glusterd &> /dev/null ++if [ $? -eq 0 ]; then ++ kill -9 `pgrep -f gsyncd.py` &> /dev/null ++ ++ killall --wait -SIGTERM glusterd &> /dev/null ++ ++ if [ "$?" != "0" ]; then ++ echo "killall failed while killing glusterd" ++ fi ++ ++ glusterd --xlator-option *.upgrade=on -N ++ ++ #Cleaning leftover glusterd socket file which is created by glusterd in ++ #rpm_script_t context. ++ rm -rf /var/run/glusterd.socket ++ ++ # glusterd _was_ running, we killed it, it exited after *.upgrade=on, ++ # so start it again ++ %service_start glusterd ++else ++ glusterd --xlator-option *.upgrade=on -N ++ ++ #Cleaning leftover glusterd socket file which is created by glusterd in ++ #rpm_script_t context. ++ rm -rf /var/run/glusterd.socket ++fi ++ + %endif + + %changelog +-- +1.8.3.1 + diff --git a/SOURCES/0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch b/SOURCES/0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch new file mode 100644 index 0000000..c00c7f4 --- /dev/null +++ b/SOURCES/0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch @@ -0,0 +1,38 @@ +From 968e5e698a070f9e6905a86c9c8338c36fcfa339 Mon Sep 17 00:00:00 2001 +From: moagrawa +Date: Mon, 15 Jan 2018 18:21:27 +0530 +Subject: [PATCH 20/52] spec: unpackaged files found for RHEL-7 client build + +Problem: unpackages files found for RHEL-7 client build + +Solution: Update glusterfs.specs.in to exclude unpackage files +Label: DOWNSTREAM ONLY + +Change-Id: I761188a6a8447105b53bf3334ded963c645cab5b +Signed-off-by: moagrawa +Reviewed-on: https://code.engineering.redhat.com/gerrit/127758 +Tested-by: RHGS Build Bot +Reviewed-by: Milind Changire +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 208a82d..ec06176 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1141,8 +1141,10 @@ exit 0 + %exclude %{_sbindir}/gluster-setgfid2path + %exclude %{_sbindir}/glusterd + %exclude %{_sbindir}/snap_scheduler.py ++%if ( 0%{?_with_systemd:1} ) + %exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh + %exclude %{_datadir}/glusterfs/scripts/control-mem.sh ++%endif + %exclude %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh + %exclude %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh + %exclude %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh +-- +1.8.3.1 + diff --git a/SOURCES/0021-cli-glusterfsd-remove-copyright-information.patch b/SOURCES/0021-cli-glusterfsd-remove-copyright-information.patch new file mode 100644 index 0000000..0aa1d07 --- /dev/null +++ b/SOURCES/0021-cli-glusterfsd-remove-copyright-information.patch @@ -0,0 +1,66 @@ +From fbc7f0e5ac8c292b865a8e02ceed2efa101d145c Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Mon, 12 Mar 2018 19:47:11 +0530 +Subject: [PATCH 21/52] cli/glusterfsd: remove copyright information + +There's no point of dumping upstream copyright information in --version. + +Label: DOWNSTREAM ONLY + +Change-Id: I3a10e30878698e1d53082936bbf22bca560a3896 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/132445 +Tested-by: RHGS Build Bot +Reviewed-by: Milind Changire +--- + cli/src/cli.c | 11 +---------- + glusterfsd/src/glusterfsd.c | 11 +---------- + 2 files changed, 2 insertions(+), 20 deletions(-) + +diff --git a/cli/src/cli.c b/cli/src/cli.c +index 84ce0f4..08f117e 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -65,16 +65,7 @@ extern int connected; + /* using argp for command line parsing */ + + const char *argp_program_version = +- "" PACKAGE_NAME " " PACKAGE_VERSION +- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION +- "\n" +- "Copyright (c) 2006-2016 Red Hat, Inc. " +- "\n" +- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" +- "It is licensed to you under your choice of the GNU Lesser\n" +- "General Public License, version 3 or any later version (LGPLv3\n" +- "or later), or the GNU General Public License, version 2 (GPLv2),\n" +- "in all cases as published by the Free Software Foundation."; ++ PACKAGE_NAME" "PACKAGE_VERSION; + const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">"; + + struct rpc_clnt *global_quotad_rpc; +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 5d46b3d..c983882 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -86,16 +86,7 @@ static char argp_doc[] = + "--volfile-server=SERVER [MOUNT-POINT]\n" + "--volfile=VOLFILE [MOUNT-POINT]"; + const char *argp_program_version = +- "" PACKAGE_NAME " " PACKAGE_VERSION +- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION +- "\n" +- "Copyright (c) 2006-2016 Red Hat, Inc. " +- "\n" +- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" +- "It is licensed to you under your choice of the GNU Lesser\n" +- "General Public License, version 3 or any later version (LGPLv3\n" +- "or later), or the GNU General Public License, version 2 (GPLv2),\n" +- "in all cases as published by the Free Software Foundation."; ++ PACKAGE_NAME" "PACKAGE_VERSION; + const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">"; + + static error_t +-- +1.8.3.1 + diff --git a/SOURCES/0022-cli-Remove-upstream-doc-reference.patch b/SOURCES/0022-cli-Remove-upstream-doc-reference.patch new file mode 100644 index 0000000..5f9bf28 --- /dev/null +++ b/SOURCES/0022-cli-Remove-upstream-doc-reference.patch @@ -0,0 +1,40 @@ +From 00db0c44d109e6f3e394487bf76ff28ba2eee7de Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Thu, 15 Mar 2018 12:56:02 +0530 +Subject: [PATCH 22/52] cli: Remove upstream doc reference + +...that is displayed while creating replica 2 volumes. + +Label: DOWNSTREAM ONLY + +Change-Id: I16b45c8ad3a33cdd2a464d84f51d006d8f568b23 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/132744 +Reviewed-by: Karthik Subrahmanya +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + cli/src/cli-cmd-parser.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index f148c59..760a10c 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -606,11 +606,8 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words, + "Replica 2 volumes are prone" + " to split-brain. Use " + "Arbiter or Replica 3 to " +- "avoid this. See: " +- "http://docs.gluster.org/en/latest/" +- "Administrator%20Guide/" +- "Split%20brain%20and%20ways%20to%20deal%20with%20it/." +- "\nDo you still want to " ++ "avoid this.\n" ++ "Do you still want to " + "continue?\n"; + answer = cli_cmd_get_confirmation(state, question); + if (GF_ANSWER_NO == answer) { +-- +1.8.3.1 + diff --git a/SOURCES/0023-hooks-remove-selinux-hooks.patch b/SOURCES/0023-hooks-remove-selinux-hooks.patch new file mode 100644 index 0000000..3d14855 --- /dev/null +++ b/SOURCES/0023-hooks-remove-selinux-hooks.patch @@ -0,0 +1,148 @@ +From 421743b7cfa6a249544f6abb4cca5a612bd20ea1 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 11 Dec 2018 16:21:43 +0530 +Subject: [PATCH 23/52] hooks: remove selinux hooks + +Label: DOWNSTREAM ONLY + +Change-Id: I810466a0ca99ab21f5a8eac8cdffbb18333d10ad +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/135800 +Tested-by: RHGS Build Bot +Reviewed-by: Jiffin Thottan +Reviewed-by: Milind Changire +--- + configure.ac | 20 -------------------- + extras/hook-scripts/Makefile.am | 2 +- + extras/hook-scripts/create/Makefile.am | 1 - + extras/hook-scripts/create/post/Makefile.am | 8 -------- + extras/hook-scripts/delete/Makefile.am | 1 - + extras/hook-scripts/delete/pre/Makefile.am | 8 -------- + glusterfs.spec.in | 2 -- + 7 files changed, 1 insertion(+), 41 deletions(-) + delete mode 100644 extras/hook-scripts/create/Makefile.am + delete mode 100644 extras/hook-scripts/create/post/Makefile.am + delete mode 100644 extras/hook-scripts/delete/Makefile.am + delete mode 100644 extras/hook-scripts/delete/pre/Makefile.am + +diff --git a/configure.ac b/configure.ac +index 2f341de..0d06f5a 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -214,10 +214,6 @@ AC_CONFIG_FILES([Makefile + extras/hook-scripts/add-brick/Makefile + extras/hook-scripts/add-brick/pre/Makefile + extras/hook-scripts/add-brick/post/Makefile +- extras/hook-scripts/create/Makefile +- extras/hook-scripts/create/post/Makefile +- extras/hook-scripts/delete/Makefile +- extras/hook-scripts/delete/pre/Makefile + extras/hook-scripts/start/Makefile + extras/hook-scripts/start/post/Makefile + extras/hook-scripts/set/Makefile +@@ -909,21 +905,6 @@ fi + AM_CONDITIONAL([BUILD_CLOUDSYNC], [test "x$BUILD_CLOUDSYNC" = "xyes"]) + dnl end cloudsync section + +-dnl SELinux feature enablement +-case $host_os in +- linux*) +- AC_ARG_ENABLE([selinux], +- AC_HELP_STRING([--disable-selinux], +- [Disable SELinux features]), +- [USE_SELINUX="${enableval}"], [USE_SELINUX="yes"]) +- ;; +- *) +- USE_SELINUX=no +- ;; +-esac +-AM_CONDITIONAL(USE_SELINUX, test "x${USE_SELINUX}" = "xyes") +-dnl end of SELinux feature enablement +- + AC_CHECK_HEADERS([execinfo.h], [have_backtrace=yes]) + if test "x${have_backtrace}" = "xyes"; then + AC_DEFINE(HAVE_BACKTRACE, 1, [define if found backtrace]) +@@ -1599,7 +1580,6 @@ echo "XML output : $BUILD_XML_OUTPUT" + echo "Unit Tests : $BUILD_UNITTEST" + echo "Track priv ports : $TRACK_PRIVPORTS" + echo "POSIX ACLs : $BUILD_POSIX_ACLS" +-echo "SELinux features : $USE_SELINUX" + echo "firewalld-config : $BUILD_FIREWALLD" + echo "Events : $BUILD_EVENTS" + echo "EC dynamic support : $EC_DYNAMIC_SUPPORT" +diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am +index 26059d7..771b37e 100644 +--- a/extras/hook-scripts/Makefile.am ++++ b/extras/hook-scripts/Makefile.am +@@ -1,5 +1,5 @@ + EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh +-SUBDIRS = add-brick create delete set start stop reset ++SUBDIRS = add-brick set start stop reset + + scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/ + if USE_GEOREP +diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am +deleted file mode 100644 +index b083a91..0000000 +--- a/extras/hook-scripts/create/Makefile.am ++++ /dev/null +@@ -1 +0,0 @@ +-SUBDIRS = post +diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am +deleted file mode 100644 +index fd1892e..0000000 +--- a/extras/hook-scripts/create/post/Makefile.am ++++ /dev/null +@@ -1,8 +0,0 @@ +-EXTRA_DIST = S10selinux-label-brick.sh +- +-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/ +-if WITH_SERVER +-if USE_SELINUX +-scripts_SCRIPTS = S10selinux-label-brick.sh +-endif +-endif +diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am +deleted file mode 100644 +index c98a05d..0000000 +--- a/extras/hook-scripts/delete/Makefile.am ++++ /dev/null +@@ -1 +0,0 @@ +-SUBDIRS = pre +diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am +deleted file mode 100644 +index 4fbfbe7..0000000 +--- a/extras/hook-scripts/delete/pre/Makefile.am ++++ /dev/null +@@ -1,8 +0,0 @@ +-EXTRA_DIST = S10selinux-del-fcontext.sh +- +-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/ +-if WITH_SERVER +-if USE_SELINUX +-scripts_SCRIPTS = S10selinux-del-fcontext.sh +-endif +-endif +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index ec06176..db50b8e 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1413,7 +1413,6 @@ exit 0 + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post +@@ -1422,7 +1421,6 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post + %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre +-- +1.8.3.1 + diff --git a/SOURCES/0024-glusterd-Make-localtime-logging-option-invisible-in-.patch b/SOURCES/0024-glusterd-Make-localtime-logging-option-invisible-in-.patch new file mode 100644 index 0000000..59fe63f --- /dev/null +++ b/SOURCES/0024-glusterd-Make-localtime-logging-option-invisible-in-.patch @@ -0,0 +1,50 @@ +From 79c19f0c6d02228aa8cf4b9299afeb7e0b2ad0da Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Mon, 16 Apr 2018 17:44:19 +0530 +Subject: [PATCH 24/52] glusterd: Make localtime-logging option invisible in + downstream + +Label: DOWNSTREAM ONLY + +Change-Id: Ie631edebb7e19152392bfd3c369a96e88796bd75 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/135754 +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 3 ++- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index dd3f9eb..cbbb5d9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -86,7 +86,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = { + * TBD: Discuss the default value for this. Maybe this should be a + * dynamic value depending on the memory specifications per node */ + {GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE}, +- {GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"}, ++ /*{GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},*/ + {GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"}, + {NULL}, + }; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 9a6fe9f..fed2864 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2850,10 +2850,11 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "to have enabled when clients and/or bricks support " + "SELinux."}, + {.key = GLUSTERD_LOCALTIME_LOGGING_KEY, ++ /*{.key = GLUSTERD_LOCALTIME_LOGGING_KEY, + .voltype = "mgmt/glusterd", + .type = GLOBAL_DOC, + .op_version = GD_OP_VERSION_3_12_0, +- .validate_fn = validate_boolean}, ++ .validate_fn = validate_boolean},*/ + {.key = GLUSTERD_DAEMON_LOG_LEVEL_KEY, + .voltype = "mgmt/glusterd", + .type = GLOBAL_NO_DOC, +-- +1.8.3.1 + diff --git a/SOURCES/0025-build-make-RHGS-version-available-for-server.patch b/SOURCES/0025-build-make-RHGS-version-available-for-server.patch new file mode 100644 index 0000000..90f2592 --- /dev/null +++ b/SOURCES/0025-build-make-RHGS-version-available-for-server.patch @@ -0,0 +1,45 @@ +From 12ae1a9a62c2c94af44f55b03575ab8806bd22ee Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Mon, 23 Apr 2018 13:16:30 +0530 +Subject: [PATCH 25/52] build: make RHGS version available for server + +Make /usr/share/glusterfs/release available for gluserfs-server package. +This file contains the RHGS release number for the release. + +Label: DOWNSTREAM ONLY + +Change-Id: I7485f77cfb8ca7f0f8363a20124900ae9ae8a528 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/137139 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + glusterfs.spec.in | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index db50b8e..bdb47ba 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -862,6 +862,10 @@ install -p -m 0744 -D extras/command-completion/gluster.bash \ + %{buildroot}%{_sysconfdir}/bash_completion.d/gluster + %endif + ++%if ( 0%{!?_without_server:1} ) ++echo "RHGS 3.5" > %{buildroot}%{_datadir}/glusterfs/release ++%endif ++ + %clean + rm -rf %{buildroot} + +@@ -1452,6 +1456,7 @@ exit 0 + + # Extra utility script + %dir %{_libexecdir}/glusterfs ++ %{_datadir}/glusterfs/release + %dir %{_datadir}/glusterfs/scripts + %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh + %if ( 0%{?_with_systemd:1} ) +-- +1.8.3.1 + diff --git a/SOURCES/0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch b/SOURCES/0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch new file mode 100644 index 0000000..8aa9fde --- /dev/null +++ b/SOURCES/0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch @@ -0,0 +1,68 @@ +From a3538a7d1fb7674acdf0934847f4004d8fbc4709 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Tue, 11 Dec 2018 17:57:50 +0530 +Subject: [PATCH 26/52] glusterd: Introduce daemon-log-level cluster wide + option + +This option, applicable to the node level daemons can be very helpful in +controlling the log level of these services. Please note any daemon +which is started prior to setting the specific value of this option (if +not INFO) will need to go through a restart to have this change into +effect. + +> upstream patch : https://review.gluster.org/#/c/20442/ + +Please note there's a difference in deownstream delta. The op-version +against this option is already tageed as 3_11_2 in RHGS 3.3.1 and hence +the same is retained. Marking this DOWNSTREAM_ONLY label because of + +Label: DOWNSTREAM ONLY + +IMPORTANT: +This patch only sets .op_version in glusterd-volume-set.c to +GD_OP_VERSION_3_11_2 as per Atin's recommendation on +Tue, Dec 11, 2018 5:46pm IST + +>Change-Id: I7f6d2620bab2b094c737f5cc816bc093e9c9c4c9 +>fixes: bz#1597473 +>Signed-off-by: Atin Mukherjee + +Change-Id: I7f6d2620bab2b094c737f5cc816bc093e9c9c4c9 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/143137 +Tested-by: RHGS Build Bot +Reviewed-by: Sanju Rakonde +--- + libglusterfs/src/glusterfs/globals.h | 2 ++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index b9da872..a278f18 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -104,6 +104,8 @@ + + #define GD_OP_VERSION_3_11_1 31101 /* Op-version for GlusterFS 3.11.1 */ + ++#define GD_OP_VERSION_3_11_2 31102 /* Op-version for GlusterFS 3.11.2 */ ++ + #define GD_OP_VERSION_3_12_0 31200 /* Op-version for GlusterFS 3.12.0 */ + + #define GD_OP_VERSION_3_12_2 31202 /* Op-version for GlusterFS 3.12.2 */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index fed2864..84f2705 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2859,7 +2859,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "mgmt/glusterd", + .type = GLOBAL_NO_DOC, + .value = "INFO", +- .op_version = GD_OP_VERSION_5_0}, ++ .op_version = GD_OP_VERSION_3_11_2}, + {.key = "debug.delay-gen", + .voltype = "debug/delay-gen", + .option = "!debug", +-- +1.8.3.1 + diff --git a/SOURCES/0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch b/SOURCES/0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch new file mode 100644 index 0000000..76b430c --- /dev/null +++ b/SOURCES/0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch @@ -0,0 +1,50 @@ +From 9be3c4745b161f1815f77cd19b550ac9795845f5 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Thu, 20 Sep 2018 22:01:05 +0530 +Subject: [PATCH 27/52] glusterd: change op-version of fips-mode-rchecksum + +..to GD_OP_VERSION_3_13_3 since GD_OP_VERSION_4_0_0 is not present in +rhgs-3.4.1 + +Label: DOWNSTREAM ONLY + +Change-Id: I759272748177d174b15123faffc2305f7a5ec58f +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/150714 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs/globals.h | 2 ++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index a278f18..4a82889 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -118,6 +118,8 @@ + + #define GD_OP_VERSION_3_13_2 31302 /* Op-version for GlusterFS 3.13.2 */ + ++#define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */ ++ + #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ + + #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 84f2705..2bd0a9c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2329,7 +2329,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .key = "storage.fips-mode-rchecksum", + .type = NO_DOC, + .voltype = "storage/posix", +- .op_version = GD_OP_VERSION_4_0_0, ++ .op_version = GD_OP_VERSION_3_13_3, + }, + { + .option = "force-create-mode", +-- +1.8.3.1 + diff --git a/SOURCES/0028-glusterd-Reset-op-version-for-features.shard-deletio.patch b/SOURCES/0028-glusterd-Reset-op-version-for-features.shard-deletio.patch new file mode 100644 index 0000000..b39c16b --- /dev/null +++ b/SOURCES/0028-glusterd-Reset-op-version-for-features.shard-deletio.patch @@ -0,0 +1,52 @@ +From 64ffcf770c5c0087f8937b5235ed0ad5b0efe7f2 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Wed, 12 Sep 2018 21:41:35 +0530 +Subject: [PATCH 28/52] glusterd: Reset op-version for + "features.shard-deletion-rate" + +The op-version for the "features.shard-deletion-rate" option was set to +4.2.0 in the upstream patch and backported at +e75be952569eb69325d5f505f7ab94aace31be52. +This commit reverts the op-version for this option to 3.13.3. + +Label: DOWNSTREAM ONLY + +Change-Id: Ie3d12f3119ad7a4b40d81bd8bd6ed591658e8371 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/154865 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs/globals.h | 2 ++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 4a82889..4d95f75 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -120,6 +120,8 @@ + + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */ + ++#define GD_OP_VERSION_3_13_4 31304 /* Op-version for GlusterFS 3.13.4 */ ++ + #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ + + #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 2bd0a9c..2f3271f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2552,7 +2552,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + }, + {.key = "features.shard-deletion-rate", + .voltype = "features/shard", +- .op_version = GD_OP_VERSION_5_0, ++ .op_version = GD_OP_VERSION_3_13_4, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "features.scrub-throttle", +-- +1.8.3.1 + diff --git a/SOURCES/0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch b/SOURCES/0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch new file mode 100644 index 0000000..752a81b --- /dev/null +++ b/SOURCES/0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch @@ -0,0 +1,39 @@ +From b504052d003aa41fbd44eec286d1733b6f2a168e Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Tue, 6 Nov 2018 18:44:55 +0530 +Subject: [PATCH 29/52] glusterd: Reset op-version for + "features.shard-lru-limit" + +The op-version for the "features.shard-lru-limit" option was set to +4.2.0 in the upstream patch and backported at +41e7e33c6512e98a1567e5a5532d3898b59cfa98 + +This commit reverts the op-version for this option to 3.13.4. + +Label: DOWNSTREAM ONLY + +Change-Id: I7d3ed6b373851267c78fc6815a83bee2c0906413 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/155127 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 2f3271f..4bf89a6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2546,7 +2546,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + { + .key = "features.shard-lru-limit", + .voltype = "features/shard", +- .op_version = GD_OP_VERSION_5_0, ++ .op_version = GD_OP_VERSION_3_13_4, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .type = NO_DOC, + }, +-- +1.8.3.1 + diff --git a/SOURCES/0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch b/SOURCES/0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch new file mode 100644 index 0000000..b50236d --- /dev/null +++ b/SOURCES/0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch @@ -0,0 +1,42 @@ +From 1d2d29396ee25f09c7d379a992ac9bd244e89c39 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Thu, 13 Dec 2018 14:28:57 +0530 +Subject: [PATCH 30/52] selinux/glusterd : add "features.selinux" to + glusterd-volume-set.c + +updates: #593 +Change-Id: I38675ba4d47c8ba7f94cfb4734692683ddb3dcfd +Signed-off-by: Jiffin Tony Thottan +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 4bf89a6..11265bf 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -1203,10 +1203,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "performance/io-threads", + .option = "pass-through", + .op_version = GD_OP_VERSION_4_1_0}, +- {.key = "performance.least-rate-limit", +- .voltype = "performance/io-threads", +- .op_version = 1 +- }, ++ {.key = "performance.least-rate-limit", ++ .voltype = "performance/io-threads", ++ .op_version = 1}, + + /* Other perf xlators' options */ + {.key = "performance.io-cache-pass-through", +@@ -2849,7 +2848,6 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "trusted.gluster.selinux on the bricks. Recommended " + "to have enabled when clients and/or bricks support " + "SELinux."}, +- {.key = GLUSTERD_LOCALTIME_LOGGING_KEY, + /*{.key = GLUSTERD_LOCALTIME_LOGGING_KEY, + .voltype = "mgmt/glusterd", + .type = GLOBAL_DOC, +-- +1.8.3.1 + diff --git a/SOURCES/0031-glusterd-turn-off-selinux-feature-in-downstream.patch b/SOURCES/0031-glusterd-turn-off-selinux-feature-in-downstream.patch new file mode 100644 index 0000000..a7e1e26 --- /dev/null +++ b/SOURCES/0031-glusterd-turn-off-selinux-feature-in-downstream.patch @@ -0,0 +1,34 @@ +From c3176144e531e22bfe97d0fef3b0e3e449fb1d32 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Mon, 16 Apr 2018 13:47:12 +0530 +Subject: [PATCH 31/52] glusterd: turn off selinux feature in downstream + +In RHGS 3.4.0 selinux feature was never meant to be qualified. + +Label: DOWNSTREAM ONLY + +Change-Id: I0cd5eb5207a757c8b6ef789980c061f211410bd5 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/135716 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 11265bf..d1244e4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2842,7 +2842,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = VKEY_FEATURES_SELINUX, + .voltype = "features/selinux", + .type = NO_DOC, +- .value = "on", ++ .value = "off", + .op_version = GD_OP_VERSION_3_11_0, + .description = "Convert security.selinux xattrs to " + "trusted.gluster.selinux on the bricks. Recommended " +-- +1.8.3.1 + diff --git a/SOURCES/0032-glusterd-update-gd-op-version-to-3_7_0.patch b/SOURCES/0032-glusterd-update-gd-op-version-to-3_7_0.patch new file mode 100644 index 0000000..82b34e3 --- /dev/null +++ b/SOURCES/0032-glusterd-update-gd-op-version-to-3_7_0.patch @@ -0,0 +1,29 @@ +From bfa7055c3901b34a49f7933ea9edcf6465843de1 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Wed, 23 Jan 2019 14:22:00 +0530 +Subject: [PATCH 32/52] glusterd: update gd-op-version to 3_7_0 + +Label: DOWNSTREAM ONLY + +Change-Id: Ia6456134cd7e544a415692d09cd1ccbb6e02dd82 +Signed-off-by: Milind Changire +--- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index 6365b6e..e20e3c4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -1174,7 +1174,7 @@ glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr) + * 'force' + */ + ret = glusterd_check_client_op_version_support( +- volname, GD_OP_VERSION_3_6_0, NULL); ++ volname, GD_OP_VERSION_3_7_0, NULL); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume %s has one or " +-- +1.8.3.1 + diff --git a/SOURCES/0033-build-add-missing-explicit-package-dependencies.patch b/SOURCES/0033-build-add-missing-explicit-package-dependencies.patch new file mode 100644 index 0000000..57c2919 --- /dev/null +++ b/SOURCES/0033-build-add-missing-explicit-package-dependencies.patch @@ -0,0 +1,83 @@ +From 52e2d75c2c8e32d2e4f69840e34d21b39279284a Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Thu, 13 Dec 2018 12:46:56 +0530 +Subject: [PATCH 33/52] build: add missing explicit package dependencies + +Add dependencies for glusterfs-libs, and other packages. +This is an Errata Tool whine. + +Label: DOWNSTREAM ONLY + +Change-Id: Ieaadb6e4ffa84d1811aa740f7891855568ecbcbb +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/158501 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index bdb47ba..9cd4372 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -323,6 +323,7 @@ and client framework. + Summary: GlusterFS api library + Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description api + GlusterFS is a distributed file-system capable of scaling to several +@@ -340,6 +341,7 @@ Summary: Development Libraries + Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-devel%{?_isa} = %{version}-%{release} + Requires: libacl-devel ++Requires: %{name}-api%{?_isa} = %{version}-%{release} + + %description api-devel + GlusterFS is a distributed file-system capable of scaling to several +@@ -391,6 +393,8 @@ Requires: %{name}%{?_isa} = %{version}-%{release} + %if ( 0%{!?_without_extra_xlators:1} ) + Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release} + %endif ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} ++Requires: %{name}-server%{?_isa} = %{version}-%{release} + + %description devel + GlusterFS is a distributed file-system capable of scaling to several +@@ -435,6 +439,7 @@ Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} + + Obsoletes: %{name}-client < %{version}-%{release} + Provides: %{name}-client = %{version}-%{release} ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description fuse + GlusterFS is a distributed file-system capable of scaling to several +@@ -459,6 +464,7 @@ Requires: python%{_pythonver}-gluster = %{version}-%{release} + + Requires: rsync + Requires: util-linux ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description geo-replication + GlusterFS is a distributed file-system capable of scaling to several +@@ -536,6 +542,7 @@ BuildRequires: libibverbs-devel + BuildRequires: librdmacm-devel >= 1.0.15 + %endif + Requires: %{name}%{?_isa} = %{version}-%{release} ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description rdma + GlusterFS is a distributed file-system capable of scaling to several +@@ -664,6 +671,7 @@ This package provides the glusterfs thin-arbiter translator. + + %package client-xlators + Summary: GlusterFS client-side translators ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description client-xlators + GlusterFS is a distributed file-system capable of scaling to several +-- +1.8.3.1 + diff --git a/SOURCES/0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch b/SOURCES/0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch new file mode 100644 index 0000000..587a25a --- /dev/null +++ b/SOURCES/0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch @@ -0,0 +1,59 @@ +From 463a920541a7579f2407f22597e4014494422804 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Mon, 17 Dec 2018 14:07:01 +0530 +Subject: [PATCH 34/52] glusterd: introduce a new op-version for rhgs-3.4.3 + +This patch introduces a new op-version 31305 for rhgs-3.4.3 and +sets the max op-version to 31305. + +For migrating profile commands (commit e68845ff7018e5d81d7979684b18e6eda449b088) +we used GD_OP_VERSION_6_0 in upstream. we are changing +it to GD_OP_VERSION_3_13_5 here. + +Label: DOWNSTREAM ONLY + +Change-Id: Ie3a05c70eb4e406889c468343f54e999b1218f19 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/158795 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/glusterfs/globals.h | 2 ++ + xlators/mgmt/glusterd/src/glusterd-handler.c | 4 ++-- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 4d95f75..6642ba0 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -122,6 +122,8 @@ + + #define GD_OP_VERSION_3_13_4 31304 /* Op-version for GlusterFS 3.13.4 */ + ++#define GD_OP_VERSION_3_13_5 31305 /* Op-version for GlusterFS 3.13.5 */ ++ + #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ + + #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 387643d..de44af7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -3085,12 +3085,12 @@ __glusterd_handle_cli_profile_volume(rpcsvc_request_t *req) + goto out; + } + +- if (conf->op_version < GD_OP_VERSION_6_0) { ++ if (conf->op_version < GD_OP_VERSION_3_13_5) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than %d. Falling back " + "to op-sm framework.", +- GD_OP_VERSION_6_0); ++ GD_OP_VERSION_3_13_5); + ret = glusterd_op_begin(req, cli_op, dict, err_str, sizeof(err_str)); + glusterd_friend_sm(); + glusterd_op_sm(); +-- +1.8.3.1 + diff --git a/SOURCES/0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch b/SOURCES/0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch new file mode 100644 index 0000000..643ba3a --- /dev/null +++ b/SOURCES/0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch @@ -0,0 +1,41 @@ +From 254033a80d85460675c921c272fb94bb7e9f67d4 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 18 Dec 2018 17:57:25 +0530 +Subject: [PATCH 35/52] glusterd: tag rebalance mgmt_v3 command to op-version + 31305 + +In upstream migrating rebalance command is tagged to op-version 60000 +but in downstream the latest new op-version is 31305. + +Label: DOWNSTREAM ONLY + +Change-Id: I30bbad3efca29bf42b9a750581eb1aebc8a30ff9 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/158943 +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index e20e3c4..ed5ded5 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -573,12 +573,12 @@ __glusterd_handle_defrag_volume(rpcsvc_request_t *req) + } else + op = GD_OP_REBALANCE; + +- if (priv->op_version < GD_OP_VERSION_6_0) { ++ if (priv->op_version < GD_OP_VERSION_3_13_5) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than %d. Falling back " + "to op-sm framework.", +- GD_OP_VERSION_6_0); ++ GD_OP_VERSION_3_13_5); + ret = glusterd_op_begin(req, op, dict, msg, sizeof(msg)); + glusterd_friend_sm(); + glusterd_op_sm(); +-- +1.8.3.1 + diff --git a/SOURCES/0036-build-add-conditional-dependency-on-server-for-devel.patch b/SOURCES/0036-build-add-conditional-dependency-on-server-for-devel.patch new file mode 100644 index 0000000..352078b --- /dev/null +++ b/SOURCES/0036-build-add-conditional-dependency-on-server-for-devel.patch @@ -0,0 +1,47 @@ +From d6458c40706d8886187bd9c2016087a3a1eee882 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Wed, 19 Dec 2018 13:17:42 +0530 +Subject: [PATCH 36/52] build: add conditional dependency on server for devel + +Add conditional depedency on server for glusterfs-devel + +Label: DOWNSTREAM ONLY + +Change-Id: Icc45df3db137dbc03d240c1ac774b5c8735c5f2f +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/159030 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 9cd4372..9db5a34 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -394,7 +394,9 @@ Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release} + %endif + Requires: %{name}-libs%{?_isa} = %{version}-%{release} ++%if ( 0%{!?_without_server:1} ) + Requires: %{name}-server%{?_isa} = %{version}-%{release} ++%endif + + %description devel + GlusterFS is a distributed file-system capable of scaling to several +@@ -2067,6 +2069,11 @@ fi + * Thu Feb 21 2019 Jiffin Tony Thottan + - Obsoleting gluster-gnfs package + ++* Wed Dec 19 2018 Milind Changire ++- Add explicit package dependencies (#1656357) ++- Remove absolute paths from spec file (#1350745) ++- Do not package crypt.so for FIPS compliance (#1653224) ++ + * Wed Nov 28 2018 Krutika Dhananjay + - Install /var/lib/glusterd/groups/distributed-virt by default + +-- +1.8.3.1 + diff --git a/SOURCES/0037-cli-change-the-warning-message.patch b/SOURCES/0037-cli-change-the-warning-message.patch new file mode 100644 index 0000000..e4a4544 --- /dev/null +++ b/SOURCES/0037-cli-change-the-warning-message.patch @@ -0,0 +1,35 @@ +From 7e0342e0d01204f136b0bd28931a6313ea216649 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 6 Feb 2019 19:06:45 +0530 +Subject: [PATCH 37/52] cli: change the warning message + +This patch changes the warning message user gets, when enabling brick +multiplexing to reflect OCS instead of CNS/CRS. + +Label: DOWNSTREAM ONLY + +Change-Id: Id5fd87955d5a692f8e57560245f8b0cf9882e1da +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/162405 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + cli/src/cli-cmd-parser.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 760a10c..541dc62 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1697,7 +1697,7 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + + if ((strcmp (key, "cluster.brick-multiplex") == 0)) { + question = "Brick-multiplexing is supported only for " +- "container workloads (CNS/CRS). Also it is " ++ "OCS converged or independent mode. Also it is " + "advised to make sure that either all " + "volumes are in stopped state or no bricks " + "are running before this option is modified." +-- +1.8.3.1 + diff --git a/SOURCES/0038-spec-avoid-creation-of-temp-file-in-lua-script.patch b/SOURCES/0038-spec-avoid-creation-of-temp-file-in-lua-script.patch new file mode 100644 index 0000000..00a5af3 --- /dev/null +++ b/SOURCES/0038-spec-avoid-creation-of-temp-file-in-lua-script.patch @@ -0,0 +1,230 @@ +From a577dd0a3cbf435681f10d095a0dca0595c6a354 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Sat, 9 Feb 2019 14:01:28 +0530 +Subject: [PATCH 38/52] spec: avoid creation of temp file in lua script + +Avoiding creation of temporary file to execute bash shell script from a +lua scriptlet increases install time security. + +Label: DOWNSTREAM ONLY + +Change-Id: Ie5b9035f292402b18dea768aca8bc82a1e7fa615 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/162621 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 120 ++++++------------------------------------------------ + 1 file changed, 12 insertions(+), 108 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 9db5a34..df8d116 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1542,15 +1542,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1584,15 +1576,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1626,15 +1610,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1668,15 +1644,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1709,15 +1677,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1750,15 +1710,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1792,15 +1744,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1835,15 +1779,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1878,15 +1814,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1921,15 +1849,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1965,15 +1885,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2008,15 +1920,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +-- +1.8.3.1 + diff --git a/SOURCES/0039-cli-fix-query-to-user-during-brick-mux-selection.patch b/SOURCES/0039-cli-fix-query-to-user-during-brick-mux-selection.patch new file mode 100644 index 0000000..82684cb --- /dev/null +++ b/SOURCES/0039-cli-fix-query-to-user-during-brick-mux-selection.patch @@ -0,0 +1,61 @@ +From ec707e099d4e4338d1ea21560d367b02e6339532 Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya +Date: Mon, 1 Apr 2019 16:16:47 +0530 +Subject: [PATCH 39/52] cli: fix query to user during brick-mux selection + +Label: DOWNSTREAM ONLY + +Change-Id: I59472066b917ea2b23de72bcd91dc3e275d5e055 +Signed-off-by: Sunil Kumar Acharya +--- + cli/src/cli-cmd-parser.c | 33 +++++++++++++++++---------------- + 1 file changed, 17 insertions(+), 16 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 541dc62..d9ccba1 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1693,23 +1693,24 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + goto out; + } + } +- } +- +- if ((strcmp (key, "cluster.brick-multiplex") == 0)) { +- question = "Brick-multiplexing is supported only for " +- "OCS converged or independent mode. Also it is " +- "advised to make sure that either all " +- "volumes are in stopped state or no bricks " +- "are running before this option is modified." +- "Do you still want to continue?"; + +- answer = cli_cmd_get_confirmation (state, question); +- if (GF_ANSWER_NO == answer) { +- gf_log ("cli", GF_LOG_ERROR, "Operation " +- "cancelled, exiting"); +- *op_errstr = gf_strdup ("Aborted by user."); +- ret = -1; +- goto out; ++ if ((strcmp (key, "cluster.brick-multiplex") == 0)) { ++ question = ++ "Brick-multiplexing is supported only for " ++ "OCS converged or independent mode. Also it is " ++ "advised to make sure that either all " ++ "volumes are in stopped state or no bricks " ++ "are running before this option is modified." ++ "Do you still want to continue?"; ++ ++ answer = cli_cmd_get_confirmation (state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log ("cli", GF_LOG_ERROR, "Operation " ++ "cancelled, exiting"); ++ *op_errstr = gf_strdup ("Aborted by user."); ++ ret = -1; ++ goto out; ++ } + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/0040-build-Remove-unsupported-test-cases-failing-consiste.patch b/SOURCES/0040-build-Remove-unsupported-test-cases-failing-consiste.patch new file mode 100644 index 0000000..1d10507 --- /dev/null +++ b/SOURCES/0040-build-Remove-unsupported-test-cases-failing-consiste.patch @@ -0,0 +1,136 @@ +From 79c74009892804419dce264399f3fde357d5b1c3 Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Tue, 2 Apr 2019 11:07:03 +0530 +Subject: [PATCH 40/52] build: Remove unsupported test cases failing + consistently + +The following two test cases failing in downstream regression runs. +Hence removing them as they are not supported downstream. + +tests/basic/cloudsync-sanity.t +tests/bugs/distribute/bug-882278.t + +Label: DOWNSTREAM ONLY + +Change-Id: Ie4b506639a017ec9910e44df1b721d9bfadf07b3 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/166662 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +Tested-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/basic/cloudsync-sanity.t | 22 ------------ + tests/bugs/distribute/bug-882278.t | 73 -------------------------------------- + 2 files changed, 95 deletions(-) + delete mode 100644 tests/basic/cloudsync-sanity.t + delete mode 100755 tests/bugs/distribute/bug-882278.t + +diff --git a/tests/basic/cloudsync-sanity.t b/tests/basic/cloudsync-sanity.t +deleted file mode 100644 +index 3cf719d..0000000 +--- a/tests/basic/cloudsync-sanity.t ++++ /dev/null +@@ -1,22 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../include.rc +-. $(dirname $0)/../volume.rc +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9}; +-TEST $CLI volume set $V0 features.cloudsync enable; +-TEST $CLI volume start $V0; +- +-## Mount FUSE +-TEST $GFS -s $H0 --volfile-id $V0 $M1; +- +-# This test covers lookup, mkdir, mknod, symlink, link, rename, +-# create operations +-TEST $(dirname $0)/rpc-coverage.sh $M1 +- +-cleanup; +diff --git a/tests/bugs/distribute/bug-882278.t b/tests/bugs/distribute/bug-882278.t +deleted file mode 100755 +index 8cb5147..0000000 +--- a/tests/bugs/distribute/bug-882278.t ++++ /dev/null +@@ -1,73 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-cleanup +- +-# Is there a good reason to require --fqdn elsewhere? It's worse than useless +-# here. +-H0=$(hostname -s) +- +-function recreate { +- # The rm is necessary so we don't get fooled by leftovers from old runs. +- rm -rf $1 && mkdir -p $1 +-} +- +-function count_lines { +- grep "$1" $2/* | wc -l +-} +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume info; +- +-## Start and create a volume +-TEST recreate ${B0}/${V0}-0 +-TEST recreate ${B0}/${V0}-1 +-TEST $CLI volume create $V0 $H0:$B0/${V0}-{0,1} +-TEST $CLI volume set $V0 cluster.nufa on +- +-function volinfo_field() +-{ +- local vol=$1; +- local field=$2; +- +- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +-} +- +- +-## Verify volume is created +-EXPECT "$V0" volinfo_field $V0 'Volume Name'; +-EXPECT 'Created' volinfo_field $V0 'Status'; +- +-## Start volume and verify +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-## Mount native +-special_option="--xlator-option ${V0}-dht.local-volume-name=${V0}-client-1" +-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $special_option $M0 +- +-## Create a bunch of test files. +-for i in $(seq 0 99); do +- echo hello > $(printf $M0/file%02d $i) +-done +- +-## Make sure the files went to the right place. There might be link files in +-## the other brick, but they won't have any contents. +-EXPECT "0" count_lines hello ${B0}/${V0}-0 +-EXPECT "100" count_lines hello ${B0}/${V0}-1 +- +-if [ "$EXIT_EARLY" = "1" ]; then +- exit 0; +-fi +- +-## Finish up +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +-TEST $CLI volume stop $V0; +-EXPECT 'Stopped' volinfo_field $V0 'Status'; +- +-TEST $CLI volume delete $V0; +-TEST ! $CLI volume info $V0; +- +-cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch b/SOURCES/0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch new file mode 100644 index 0000000..c1e1720 --- /dev/null +++ b/SOURCES/0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch @@ -0,0 +1,43 @@ +From c8f0ac9b429e1ff73a3e87247193c35c66212540 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya +Date: Tue, 2 Apr 2019 12:06:53 +0530 +Subject: [PATCH 41/52] tests/geo-rep: Build failed in Jenkins for test + bug-1600145.t + +Problem: the ((strcmp (key, "cluster.brick-multiplex") == 0)) +comparision in cli/src/cli-cmd-parser.c is expecting +either yes or no confirmation from cli, which is not handled +in bug-1600145.t, causing test to wait till timeout and +then fail. + +Solution: Passing yes as pipeline to +`gluster v set all cluster.brick-multiplex on` in bug-1600145.t + +Label: DOWNSTREAM ONLY + +Change-Id: I1a6c2a992b65380cea145fd1c46d22ec1251c77e +Signed-off-by: Shwetha K Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/166694 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +Tested-by: Sunil Kumar Heggodu Gopala Acharya +Reviewed-by: Sunny Kumar +--- + tests/00-geo-rep/bug-1600145.t | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/00-geo-rep/bug-1600145.t b/tests/00-geo-rep/bug-1600145.t +index 1d38bf9..359bc4f 100644 +--- a/tests/00-geo-rep/bug-1600145.t ++++ b/tests/00-geo-rep/bug-1600145.t +@@ -29,7 +29,7 @@ slave_mnt=$M1 + + ##create_and_start_master_volume + TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2}; +-gluster v set all cluster.brick-multiplex on ++yes | gluster v set all cluster.brick-multiplex on + TEST $CLI volume start $GMV0 + + ##create_and_start_slave_volume +-- +1.8.3.1 + diff --git a/SOURCES/0042-spec-client-server-Builds-are-failing-on-rhel-6.patch b/SOURCES/0042-spec-client-server-Builds-are-failing-on-rhel-6.patch new file mode 100644 index 0000000..7e3d69f --- /dev/null +++ b/SOURCES/0042-spec-client-server-Builds-are-failing-on-rhel-6.patch @@ -0,0 +1,123 @@ +From f25a92028ecc2018953a6375bba43a21d3a93566 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 4 Apr 2019 16:18:51 +0530 +Subject: [PATCH 42/52] spec: (client|server) Builds are failing on rhel-6 + +Problem: 1) For sever-rpm gluster build is throwing an error + undefined reference to `dlcose` on RHEL 6 + 2) For server-rpm build is throwing reference for + For Not found for rot-13.so and symlink-cache.so + 3) For client-rpms build is throwing an error + File Not found for all files with exclude + file in without_server check + +Solution: 1) For server-rpm add LIB_DL link in Makefile + and remove reference for rot.so and symlink-cache.so + from glusterfs.spec.in + 2) Remove exclude files list as they are not + being build + +Label: DOWNSTREAM ONLY +Change-Id: I2b41604cbc8525b91231b0c5caee588c5d5d6b08 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/166962 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 54 ----------------------------------- + xlators/mgmt/glusterd/src/Makefile.am | 2 +- + 2 files changed, 1 insertion(+), 55 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index df8d116..7c7f7c0 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1097,72 +1097,18 @@ exit 0 + %{_tmpfilesdir}/gluster.conf + %endif + %if ( 0%{?_without_extra_xlators:1} ) +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/rot-13.so + %exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so + %exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/testing/performance/symlink-cache.so + %endif + %if ( 0%{?_without_regression_tests:1} ) + %exclude %{_datadir}/glusterfs/run-tests.sh + %exclude %{_datadir}/glusterfs/tests + %endif + %if 0%{?_without_server:1} +-%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-5.8.conf +-%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-7.2.conf +-%exclude %{_sysconfdir}/glusterfs/glusterd.vol +-%exclude %{_sysconfdir}/glusterfs/glusterfs-georep-logrotate +-%exclude %{_sysconfdir}/glusterfs/glusterfs-logrotate +-%exclude %{_sysconfdir}/glusterfs/group-db-workload +-%exclude %{_sysconfdir}/glusterfs/group-distributed-virt +-%exclude %{_sysconfdir}/glusterfs/group-gluster-block +-%exclude %{_sysconfdir}/glusterfs/group-metadata-cache +-%exclude %{_sysconfdir}/glusterfs/group-nl-cache +-%exclude %{_sysconfdir}/glusterfs/group-virt.example +-%exclude %{_sysconfdir}/glusterfs/logger.conf.example +-%exclude %{_sysconfdir}/rsyslog.d/gluster.conf.example +-%exclude %{_prefix}/bin/glusterfind +-%exclude %{_prefix}/lib/firewalld/services/glusterfs.xml +-%exclude %{_prefix}/lib/systemd/system/glusterd.service +-%exclude %{_prefix}/lib/systemd/system/glusterfssharedstorage.service +-%exclude %{_prefix}/lib/tmpfiles.d/gluster.conf +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/marker.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix-locks.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quota.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quotad.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/selinux.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-server.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/decompounder.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so +-%exclude %{_libexecdir}/glusterfs/* +-%exclude %{_sbindir}/conf.py +-%exclude %{_sbindir}/gcron.py +-%exclude %{_sbindir}/gf_attach +-%exclude %{_sbindir}/gfind_missing_files +-%exclude %{_sbindir}/glfsheal +-%exclude %{_sbindir}/gluster +-%exclude %{_sbindir}/gluster-setgfid2path +-%exclude %{_sbindir}/glusterd +-%exclude %{_sbindir}/snap_scheduler.py + %if ( 0%{?_with_systemd:1} ) + %exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh + %exclude %{_datadir}/glusterfs/scripts/control-mem.sh + %endif +-%exclude %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh +-%exclude %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh +-%exclude %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh +-%exclude %{_sharedstatedir}/glusterd/* + %endif + + %files api +diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am +index 6d09e37..c8dd8e3 100644 +--- a/xlators/mgmt/glusterd/src/Makefile.am ++++ b/xlators/mgmt/glusterd/src/Makefile.am +@@ -6,7 +6,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt + glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) \ + -DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\" \ + -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" +-glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) ++glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(LIB_DL) + glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ + glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \ + glusterd-store.c glusterd-handshake.c glusterd-pmap.c \ +-- +1.8.3.1 + diff --git a/SOURCES/0043-inode-don-t-dump-the-whole-table-to-CLI.patch b/SOURCES/0043-inode-don-t-dump-the-whole-table-to-CLI.patch new file mode 100644 index 0000000..7e9d3c3 --- /dev/null +++ b/SOURCES/0043-inode-don-t-dump-the-whole-table-to-CLI.patch @@ -0,0 +1,137 @@ +From 416dfc70ef87400e1ddfd70e5b6e512d330b54a6 Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha +Date: Tue, 2 Apr 2019 23:25:11 +0530 +Subject: [PATCH 43/52] inode: don't dump the whole table to CLI + +dumping the whole inode table detail to screen doesn't solve any +purpose. We should be getting only toplevel details on CLI, and +then if one wants to debug further, then they need to get to +'statedump' to get full details. + +Patch on upstream master: https://review.gluster.org/#/c/glusterfs/+/22347/ + +BUG: 1578703 +Change-Id: Ie7e7f5a67c1606e3c18ce21ee6df6c7e4550c211 +Signed-off-by: Sheetal Pamecha +Reviewed-on: https://code.engineering.redhat.com/gerrit/166768 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-rpc-ops.c | 23 ++++++++++++++++++++++- + libglusterfs/src/inode.c | 13 +++++++++++++ + 2 files changed, 35 insertions(+), 1 deletion(-) + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 78043cd..12e7fcc 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -7606,15 +7606,24 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix) + uint32_t active_size = 0; + uint32_t lru_size = 0; + uint32_t purge_size = 0; ++ uint32_t lru_limit = 0; + int i = 0; + + GF_ASSERT(dict); + GF_ASSERT(prefix); + ++ snprintf(key, sizeof(key), "%s.lru_limit", prefix); ++ ret = dict_get_uint32(dict, key, &lru_limit); ++ if (ret) ++ goto out; ++ cli_out("LRU limit : %u", lru_limit); ++ + snprintf(key, sizeof(key), "%s.active_size", prefix); + ret = dict_get_uint32(dict, key, &active_size); + if (ret) + goto out; ++ ++#ifdef DEBUG + if (active_size != 0) { + cli_out("Active inodes:"); + cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type"); +@@ -7626,10 +7635,16 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix) + } + cli_out(" "); + ++#else ++ cli_out("Active Inodes : %u", active_size); ++ ++#endif + snprintf(key, sizeof(key), "%s.lru_size", prefix); + ret = dict_get_uint32(dict, key, &lru_size); + if (ret) + goto out; ++ ++#ifdef DEBUG + if (lru_size != 0) { + cli_out("LRU inodes:"); + cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type"); +@@ -7640,11 +7655,15 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix) + cli_print_volume_status_inode_entry(dict, key); + } + cli_out(" "); ++#else ++ cli_out("LRU Inodes : %u", lru_size); ++#endif + + snprintf(key, sizeof(key), "%s.purge_size", prefix); + ret = dict_get_uint32(dict, key, &purge_size); + if (ret) + goto out; ++#ifdef DEBUG + if (purge_size != 0) { + cli_out("Purged inodes:"); + cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type"); +@@ -7654,7 +7673,9 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix) + snprintf(key, sizeof(key), "%s.purge%d", prefix, i); + cli_print_volume_status_inode_entry(dict, key); + } +- ++#else ++ cli_out("Purge Inodes : %u", purge_size); ++#endif + out: + return; + } +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 87f74e0..96ddea5 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -2598,6 +2598,11 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict) + if (ret) + return; + ++ snprintf(key, sizeof(key), "%s.itable.lru_limit", prefix); ++ ret = dict_set_uint32(dict, key, itable->lru_limit); ++ if (ret) ++ goto out; ++ + snprintf(key, sizeof(key), "%s.itable.active_size", prefix); + ret = dict_set_uint32(dict, key, itable->active_size); + if (ret) +@@ -2613,6 +2618,13 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict) + if (ret) + goto out; + ++#ifdef DEBUG ++ /* Dumping inode details in dictionary and sending it to CLI is not ++ required as when a developer (or support team) asks for this command ++ output, they just want to get top level detail of inode table. ++ If one wants to debug, let them take statedump and debug, this ++ wouldn't be available in CLI during production setup. ++ */ + list_for_each_entry(inode, &itable->active, list) + { + snprintf(key, sizeof(key), "%s.itable.active%d", prefix, count++); +@@ -2632,6 +2644,7 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict) + snprintf(key, sizeof(key), "%s.itable.purge%d", prefix, count++); + inode_dump_to_dict(inode, key, dict); + } ++#endif + + out: + pthread_mutex_unlock(&itable->lock); +-- +1.8.3.1 + diff --git a/SOURCES/0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch b/SOURCES/0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch new file mode 100644 index 0000000..30ab28b --- /dev/null +++ b/SOURCES/0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch @@ -0,0 +1,360 @@ +From bc6588890ce94101a63b861178cf38db5549d8a8 Mon Sep 17 00:00:00 2001 +From: Ashish Pandey +Date: Wed, 28 Nov 2018 11:22:52 +0530 +Subject: [PATCH 44/52] cluster/ec: Don't enqueue an entry if it is already + healing + +Problem: +1 - heal-wait-qlength is by default 128. If shd is disabled +and we need to heal files, client side heal is needed. +If we access these files that will trigger the heal. +However, it has been observed that a file will be enqueued +multiple times in the heal wait queue, which in turn causes +queue to be filled and prevent other files to be enqueued. + +2 - While a file is going through healing and a write fop from +mount comes on that file, it sends write on all the bricks including +healing one. At the end it updates version and size on all the +bricks. However, it does not unset dirty flag on all the bricks, +even if this write fop was successful on all the bricks. +After healing completion this dirty flag remain set and never +gets cleaned up if SHD is disabled. + +Solution: +1 - If an entry is already in queue or going through heal process, +don't enqueue next client side request to heal the same file. + +2 - Unset dirty on all the bricks at the end if fop has succeeded on +all the bricks even if some of the bricks are going through heal. + +backport of : https://review.gluster.org/#/c/glusterfs/+/21744/ + +Change-Id: Ia61ffe230c6502ce6cb934425d55e2f40dd1a727 +BUG: 1600918 +Signed-off-by: Ashish Pandey +Reviewed-on: https://code.engineering.redhat.com/gerrit/166296 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/ec/bug-1236065.t | 1 - + xlators/cluster/ec/src/ec-common.c | 43 +++++++++------ + xlators/cluster/ec/src/ec-common.h | 8 +++ + xlators/cluster/ec/src/ec-heal.c | 104 +++++++++++++++++++++++++++++++----- + xlators/cluster/ec/src/ec-helpers.c | 1 + + xlators/cluster/ec/src/ec-types.h | 1 + + 6 files changed, 127 insertions(+), 31 deletions(-) + +diff --git a/tests/bugs/ec/bug-1236065.t b/tests/bugs/ec/bug-1236065.t +index 76d25d7..9181e73 100644 +--- a/tests/bugs/ec/bug-1236065.t ++++ b/tests/bugs/ec/bug-1236065.t +@@ -85,7 +85,6 @@ TEST pidof glusterd + EXPECT "$V0" volinfo_field $V0 'Volume Name' + EXPECT 'Started' volinfo_field $V0 'Status' + EXPECT '7' online_brick_count +- + ## cleanup + cd + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 8d65670..5183680 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -313,14 +313,15 @@ ec_check_status(ec_fop_data_t *fop) + + gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, + "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " +- "remaining=%s, good=%s, bad=%s)", ++ "remaining=%s, good=%s, bad=%s, %s)", + gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), + ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), + ec_bin(str4, sizeof(str4), fop->good, ec->nodes), + ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), +- ec->nodes)); ++ ec->nodes), ++ ec_msg_str(fop)); + if (fop->use_fd) { + if (fop->fd != NULL) { + ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, +@@ -2371,37 +2372,47 @@ ec_update_info(ec_lock_link_t *link) + uint64_t dirty[2] = {0, 0}; + uint64_t size; + ec_t *ec = NULL; ++ uintptr_t mask; + + lock = link->lock; + ctx = lock->ctx; + ec = link->fop->xl->private; + + /* pre_version[*] will be 0 if have_version is false */ +- version[0] = ctx->post_version[0] - ctx->pre_version[0]; +- version[1] = ctx->post_version[1] - ctx->pre_version[1]; ++ version[EC_DATA_TXN] = ctx->post_version[EC_DATA_TXN] - ++ ctx->pre_version[EC_DATA_TXN]; ++ version[EC_METADATA_TXN] = ctx->post_version[EC_METADATA_TXN] - ++ ctx->pre_version[EC_METADATA_TXN]; + + size = ctx->post_size - ctx->pre_size; + /* If we set the dirty flag for update fop, we have to unset it. + * If fop has failed on some bricks, leave the dirty as marked. */ ++ + if (lock->unlock_now) { ++ if (version[EC_DATA_TXN]) { ++ /*A data fop will have difference in post and pre version ++ *and for data fop we send writes on healing bricks also */ ++ mask = lock->good_mask | lock->healing; ++ } else { ++ mask = lock->good_mask; ++ } + /* Ensure that nodes are up while doing final + * metadata update.*/ +- if (!(ec->node_mask & ~lock->good_mask) && +- !(ec->node_mask & ~ec->xl_up)) { +- if (ctx->dirty[0] != 0) { +- dirty[0] = -1; ++ if (!(ec->node_mask & ~(mask)) && !(ec->node_mask & ~ec->xl_up)) { ++ if (ctx->dirty[EC_DATA_TXN] != 0) { ++ dirty[EC_DATA_TXN] = -1; + } +- if (ctx->dirty[1] != 0) { +- dirty[1] = -1; ++ if (ctx->dirty[EC_METADATA_TXN] != 0) { ++ dirty[EC_METADATA_TXN] = -1; + } + /*If everything is fine and we already + *have version xattr set on entry, there + *is no need to update version again*/ +- if (ctx->pre_version[0]) { +- version[0] = 0; ++ if (ctx->pre_version[EC_DATA_TXN]) { ++ version[EC_DATA_TXN] = 0; + } +- if (ctx->pre_version[1]) { +- version[1] = 0; ++ if (ctx->pre_version[EC_METADATA_TXN]) { ++ version[EC_METADATA_TXN] = 0; + } + } else { + link->optimistic_changelog = _gf_false; +@@ -2410,8 +2421,8 @@ ec_update_info(ec_lock_link_t *link) + memset(ctx->dirty, 0, sizeof(ctx->dirty)); + } + +- if ((version[0] != 0) || (version[1] != 0) || (dirty[0] != 0) || +- (dirty[1] != 0)) { ++ if ((version[EC_DATA_TXN] != 0) || (version[EC_METADATA_TXN] != 0) || ++ (dirty[EC_DATA_TXN] != 0) || (dirty[EC_METADATA_TXN] != 0)) { + ec_update_size_version(link, version, size, dirty); + return _gf_true; + } +diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h +index 115e147..54aaa77 100644 +--- a/xlators/cluster/ec/src/ec-common.h ++++ b/xlators/cluster/ec/src/ec-common.h +@@ -190,4 +190,12 @@ ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, + void + ec_update_fd_status(fd_t *fd, xlator_t *xl, int child_index, + int32_t ret_status); ++gf_boolean_t ++ec_is_entry_healing(ec_fop_data_t *fop); ++void ++ec_set_entry_healing(ec_fop_data_t *fop); ++void ++ec_reset_entry_healing(ec_fop_data_t *fop); ++char * ++ec_msg_str(ec_fop_data_t *fop); + #endif /* __EC_COMMON_H__ */ +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index eaf80e0..1ca12c1 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -103,6 +103,48 @@ ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata) + } + /* FOP: heal */ + ++void ++ec_set_entry_healing(ec_fop_data_t *fop) ++{ ++ ec_inode_t *ctx = NULL; ++ loc_t *loc = NULL; ++ ++ if (!fop) ++ return; ++ ++ loc = &fop->loc[0]; ++ LOCK(&loc->inode->lock); ++ { ++ ctx = __ec_inode_get(loc->inode, fop->xl); ++ if (ctx) { ++ ctx->heal_count += 1; ++ } ++ } ++ UNLOCK(&loc->inode->lock); ++} ++ ++void ++ec_reset_entry_healing(ec_fop_data_t *fop) ++{ ++ ec_inode_t *ctx = NULL; ++ loc_t *loc = NULL; ++ int32_t heal_count = 0; ++ if (!fop) ++ return; ++ ++ loc = &fop->loc[0]; ++ LOCK(&loc->inode->lock); ++ { ++ ctx = __ec_inode_get(loc->inode, fop->xl); ++ if (ctx) { ++ ctx->heal_count += -1; ++ heal_count = ctx->heal_count; ++ } ++ } ++ UNLOCK(&loc->inode->lock); ++ GF_ASSERT(heal_count >= 0); ++} ++ + uintptr_t + ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood) + { +@@ -2507,17 +2549,6 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); + goto out; + } +- +- msources = alloca0(ec->nodes); +- mhealed_sinks = alloca0(ec->nodes); +- ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks); +- if (ret == 0) { +- mgood = ec_char_array_to_mask(msources, ec->nodes); +- mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes); +- } else { +- op_ret = -1; +- op_errno = -ret; +- } + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); + if (IA_ISREG(loc->inode->ia_type)) { +@@ -2538,8 +2569,19 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + op_ret = -1; + op_errno = -ret; + } ++ msources = alloca0(ec->nodes); ++ mhealed_sinks = alloca0(ec->nodes); ++ ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks); ++ if (ret == 0) { ++ mgood = ec_char_array_to_mask(msources, ec->nodes); ++ mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes); ++ } else { ++ op_ret = -1; ++ op_errno = -ret; ++ } + + out: ++ ec_reset_entry_healing(fop); + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno, + ec_char_array_to_mask(participants, ec->nodes), +@@ -2650,11 +2692,33 @@ ec_handle_healers_done(ec_fop_data_t *fop) + ec_launch_heal(ec, heal_fop); + } + ++gf_boolean_t ++ec_is_entry_healing(ec_fop_data_t *fop) ++{ ++ ec_inode_t *ctx = NULL; ++ int32_t heal_count = 0; ++ loc_t *loc = NULL; ++ ++ loc = &fop->loc[0]; ++ ++ LOCK(&loc->inode->lock); ++ { ++ ctx = __ec_inode_get(loc->inode, fop->xl); ++ if (ctx) { ++ heal_count = ctx->heal_count; ++ } ++ } ++ UNLOCK(&loc->inode->lock); ++ GF_ASSERT(heal_count >= 0); ++ return heal_count; ++} ++ + void + ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) + { + gf_boolean_t can_heal = _gf_true; + ec_t *ec = this->private; ++ ec_fop_data_t *fop_rel = NULL; + + if (fop->req_frame == NULL) { + LOCK(&ec->lock); +@@ -2662,8 +2726,13 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) + if ((ec->background_heals > 0) && + (ec->heal_wait_qlen + ec->background_heals) > + (ec->heal_waiters + ec->healers)) { +- list_add_tail(&fop->healer, &ec->heal_waiting); +- ec->heal_waiters++; ++ if (!ec_is_entry_healing(fop)) { ++ list_add_tail(&fop->healer, &ec->heal_waiting); ++ ec->heal_waiters++; ++ ec_set_entry_healing(fop); ++ } else { ++ fop_rel = fop; ++ } + fop = __ec_dequeue_heals(ec); + } else { + can_heal = _gf_false; +@@ -2673,8 +2742,12 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) + } + + if (can_heal) { +- if (fop) ++ if (fop) { ++ if (fop->req_frame != NULL) { ++ ec_set_entry_healing(fop); ++ } + ec_launch_heal(ec, fop); ++ } + } else { + gf_msg_debug(this->name, 0, + "Max number of heals are " +@@ -2682,6 +2755,9 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) + ec_fop_set_error(fop, EBUSY); + ec_heal_fail(ec, fop); + } ++ if (fop_rel) { ++ ec_heal_done(0, NULL, fop_rel); ++ } + } + + void +diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c +index e6b0359..43f6e3b 100644 +--- a/xlators/cluster/ec/src/ec-helpers.c ++++ b/xlators/cluster/ec/src/ec-helpers.c +@@ -717,6 +717,7 @@ __ec_inode_get(inode_t *inode, xlator_t *xl) + memset(ctx, 0, sizeof(*ctx)); + INIT_LIST_HEAD(&ctx->heal); + INIT_LIST_HEAD(&ctx->stripe_cache.lru); ++ ctx->heal_count = 0; + value = (uint64_t)(uintptr_t)ctx; + if (__inode_ctx_set(inode, xl, &value) != 0) { + GF_FREE(ctx); +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index f3d63ca..6ae4a2b 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -171,6 +171,7 @@ struct _ec_inode { + gf_boolean_t have_config; + gf_boolean_t have_version; + gf_boolean_t have_size; ++ int32_t heal_count; + ec_config_t config; + uint64_t pre_version[2]; + uint64_t post_version[2]; +-- +1.8.3.1 + diff --git a/SOURCES/0045-glusterd-fix-txn-id-mem-leak.patch b/SOURCES/0045-glusterd-fix-txn-id-mem-leak.patch new file mode 100644 index 0000000..b9b2b3e --- /dev/null +++ b/SOURCES/0045-glusterd-fix-txn-id-mem-leak.patch @@ -0,0 +1,126 @@ +From 6c004c6c8b8f98f56e186740881520b8364e6f85 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Mon, 18 Mar 2019 16:08:04 +0530 +Subject: [PATCH 45/52] glusterd: fix txn-id mem leak + +This commit ensures the following: +1. Don't send commit op request to the remote nodes when gluster v +status all is executed as for the status all transaction the local +commit gets the name of the volumes and remote commit ops are +technically a no-op. So no need for additional rpc requests. +2. In op state machine flow, if the transaction is in staged state and +op_info.skip_locking is true, then no need to set the txn id in the +priv->glusterd_txn_opinfo dictionary which never gets freed. + +> Fixes: bz#1691164 +> Change-Id: Ib6a9300ea29633f501abac2ba53fb72ff648c822 +> Signed-off-by: Atin Mukherjee + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22388/ + +BUG: 1670415 +Change-Id: Ib6a9300ea29633f501abac2ba53fb72ff648c822 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/166449 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 26 ++++++++++++++++++++------ + xlators/mgmt/glusterd/src/glusterd-syncop.c | 16 ++++++++++++++++ + 2 files changed, 36 insertions(+), 6 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index cbbb5d9..12d857a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -5652,6 +5652,9 @@ glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx) + dict_t *dict = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; ++ glusterd_op_info_t txn_op_info = { ++ {0}, ++ }; + + this = THIS; + GF_ASSERT(this); +@@ -5686,6 +5689,7 @@ glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx) + ret = -1; + goto out; + } ++ ret = glusterd_get_txn_opinfo(&event->txn_id, &txn_op_info); + + ret = dict_set_bin(rsp_dict, "transaction_id", txn_id, sizeof(*txn_id)); + if (ret) { +@@ -5704,6 +5708,12 @@ out: + + gf_msg_debug(this->name, 0, "Returning with %d", ret); + ++ /* for no volname transactions, the txn_opinfo needs to be cleaned up ++ * as there's no unlock event triggered ++ */ ++ if (txn_op_info.skip_locking) ++ ret = glusterd_clear_txn_opinfo(txn_id); ++ + if (rsp_dict) + dict_unref(rsp_dict); + +@@ -8159,12 +8169,16 @@ glusterd_op_sm() + "Unable to clear " + "transaction's opinfo"); + } else { +- ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); +- if (ret) +- gf_msg(this->name, GF_LOG_ERROR, 0, +- GD_MSG_TRANS_OPINFO_SET_FAIL, +- "Unable to set " +- "transaction's opinfo"); ++ if (!(event_type == GD_OP_EVENT_STAGE_OP && ++ opinfo.state.state == GD_OP_STATE_STAGED && ++ opinfo.skip_locking)) { ++ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); ++ if (ret) ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ GD_MSG_TRANS_OPINFO_SET_FAIL, ++ "Unable to set " ++ "transaction's opinfo"); ++ } + } + + glusterd_destroy_op_event_ctx(event); +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 1741cf8..618d8bc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1392,6 +1392,8 @@ gd_commit_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char *errstr = NULL; + struct syncargs args = {0}; + int type = GF_QUOTA_OPTION_TYPE_NONE; ++ uint32_t cmd = 0; ++ gf_boolean_t origin_glusterd = _gf_false; + + this = THIS; + GF_ASSERT(this); +@@ -1449,6 +1451,20 @@ commit_done: + gd_syncargs_init(&args, op_ctx); + synctask_barrier_init((&args)); + peer_cnt = 0; ++ origin_glusterd = is_origin_glusterd(req_dict); ++ ++ if (op == GD_OP_STATUS_VOLUME) { ++ ret = dict_get_uint32(req_dict, "cmd", &cmd); ++ if (ret) ++ goto out; ++ ++ if (origin_glusterd) { ++ if ((cmd & GF_CLI_STATUS_ALL)) { ++ ret = 0; ++ goto out; ++ } ++ } ++ } + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) +-- +1.8.3.1 + diff --git a/SOURCES/0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch b/SOURCES/0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch new file mode 100644 index 0000000..5365515 --- /dev/null +++ b/SOURCES/0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch @@ -0,0 +1,98 @@ +From a0661449cd8ba7b851fec473191733767f4541b8 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Thu, 28 Mar 2019 17:55:54 +0530 +Subject: [PATCH 46/52] protocol/client: Do not fallback to anon-fd if fd is + not open + +If an open comes on a file when a brick is down and after the brick comes up, +a fop comes on the fd, client xlator would still wind the fop on anon-fd +leading to wrong behavior of the fops in some cases. + +Example: +If lk fop is issued on the fd just after the brick is up in the scenario above, +lk fop will be sent on anon-fd instead of failing it on that client xlator. +This lock will never be freed upon close of the fd as flush on anon-fd is +invalid and is not wound below server xlator. + +As a fix, failing the fop unless the fd has FALLBACK_TO_ANON_FD flag. + + >Upstream-patch: https://review.gluster.org/c/glusterfs/+/15804 + >Change-Id: I77692d056660b2858e323bdabdfe0a381807cccc + >fixes bz#1390914 + +BUG: 1695057 +Change-Id: Id656bea8dde14327212fbe7ecc97519dc5b32098 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/166833 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/protocol/bug-1390914.t | 36 ++++++++++++++++++++++++++++ + xlators/protocol/client/src/client-helpers.c | 8 ++++++- + 2 files changed, 43 insertions(+), 1 deletion(-) + create mode 100644 tests/bugs/protocol/bug-1390914.t + +diff --git a/tests/bugs/protocol/bug-1390914.t b/tests/bugs/protocol/bug-1390914.t +new file mode 100644 +index 0000000..e3dab92 +--- /dev/null ++++ b/tests/bugs/protocol/bug-1390914.t +@@ -0,0 +1,36 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fileio.rc ++cleanup; ++ ++#test that fops are not wound on anon-fd when fd is not open on that brick ++TEST glusterd; ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3}; ++TEST $CLI volume set $V0 performance.open-behind off ++TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume start $V0 ++TEST $CLI volume profile $V0 start ++TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0; ++ ++TEST touch $M0/1 ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST fd_open 200 'w' "$M0/1" ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++ ++#lk should only happen on 2 bricks, if there is a bug, it will plant a lock ++#with anon-fd on first-brick which will never be released because flush won't ++#be wound below server xlator for anon-fd ++TEST flock -x -n 200 ++TEST fd_close 200 ++ ++TEST fd_open 200 'w' "$M0/1" ++#this lock will fail if there is a stale lock ++TEST flock -x -n 200 ++TEST fd_close 200 ++cleanup; +diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c +index 55e87b3..2dd7106 100644 +--- a/xlators/protocol/client/src/client-helpers.c ++++ b/xlators/protocol/client/src/client-helpers.c +@@ -419,7 +419,13 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd) + { + fdctx = this_fd_get_ctx(fd, this); + if (!fdctx) { +- *remote_fd = GF_ANON_FD_NO; ++ if (fd->anonymous) { ++ *remote_fd = GF_ANON_FD_NO; ++ } else { ++ *remote_fd = -1; ++ gf_msg_debug(this->name, EBADF, "not a valid fd for gfid: %s", ++ uuid_utoa(fd->inode->gfid)); ++ } + } else { + if (__is_fd_reopen_in_progress(fdctx)) + *remote_fd = -1; +-- +1.8.3.1 + diff --git a/SOURCES/0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch b/SOURCES/0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch new file mode 100644 index 0000000..cc71864 --- /dev/null +++ b/SOURCES/0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch @@ -0,0 +1,1652 @@ +From 83d816370f7540d4065baac704df65c648a03125 Mon Sep 17 00:00:00 2001 +From: Poornima G +Date: Sun, 24 Mar 2019 09:40:50 +0530 +Subject: [PATCH 47/52] client-rpc: Fix the payload being sent on the wire + +The fops allocate 3 kind of payload(buffer) in the client xlator: +- fop payload, this is the buffer allocated by the write and put fop +- rsphdr paylod, this is the buffer required by the reply cbk of + some fops like lookup, readdir. +- rsp_paylod, this is the buffer required by the reply cbk of fops like + readv etc. + +Currently, in the lookup and readdir fop the rsphdr is sent as payload, +hence the allocated rsphdr buffer is also sent on the wire, increasing +the bandwidth consumption on the wire. + +With this patch, the issue is fixed. + +>Fixes: bz#1692093 +>Change-Id: Ie8158921f4db319e60ad5f52d851fa5c9d4a269b +>Signed-off-by: Poornima G +>Backport-of: https://review.gluster.org/22402/ + +BUG: 1693935 +Change-Id: Id12746a4c9416288bc1387c8b018bbe9cc4b637d +Signed-off-by: Poornima G +Reviewed-on: https://code.engineering.redhat.com/gerrit/166535 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/protocol/client/src/client-handshake.c | 29 ++- + xlators/protocol/client/src/client-helpers.c | 14 +- + xlators/protocol/client/src/client-rpc-fops.c | 235 ++++++++++++---------- + xlators/protocol/client/src/client-rpc-fops_v2.c | 236 ++++++++++++----------- + xlators/protocol/client/src/client.c | 22 ++- + xlators/protocol/client/src/client.h | 16 +- + 6 files changed, 308 insertions(+), 244 deletions(-) + +diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c +index f9631c5..c43756a 100644 +--- a/xlators/protocol/client/src/client-handshake.c ++++ b/xlators/protocol/client/src/client-handshake.c +@@ -34,7 +34,6 @@ typedef struct client_fd_lk_local { + clnt_fd_ctx_t *fdctx; + } clnt_fd_lk_local_t; + +- + int32_t + client3_getspec(call_frame_t *frame, xlator_t *this, void *data) + { +@@ -201,8 +200,8 @@ clnt_release_reopen_fd(xlator_t *this, clnt_fd_ctx_t *fdctx) + req.fd = fdctx->remote_fd; + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RELEASE, +- clnt_release_reopen_fd_cbk, NULL, NULL, 0, NULL, +- 0, NULL, (xdrproc_t)xdr_gfs3_releasedir_req); ++ clnt_release_reopen_fd_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_releasedir_req); + out: + if (ret) { + clnt_fd_lk_reacquire_failed(this, fdctx, conf); +@@ -486,8 +485,8 @@ protocol_client_reopendir(clnt_fd_ctx_t *fdctx, xlator_t *this) + frame->local = local; + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR, +- client3_3_reopendir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_opendir_req); ++ client3_3_reopendir_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_opendir_req); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED, + "failed to send the re-opendir request"); +@@ -547,8 +546,8 @@ protocol_client_reopenfile(clnt_fd_ctx_t *fdctx, xlator_t *this) + local->loc.path); + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN, +- client3_3_reopen_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_open_req); ++ client3_3_reopen_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_open_req); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED, + "failed to send the re-open request"); +@@ -745,8 +744,8 @@ protocol_client_reopendir_v2(clnt_fd_ctx_t *fdctx, xlator_t *this) + frame->local = local; + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR, +- client4_0_reopendir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_opendir_req); ++ client4_0_reopendir_cbk, NULL, ++ (xdrproc_t)xdr_gfx_opendir_req); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED, + "failed to send the re-opendir request"); +@@ -806,8 +805,8 @@ protocol_client_reopenfile_v2(clnt_fd_ctx_t *fdctx, xlator_t *this) + local->loc.path); + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN, +- client4_0_reopen_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_open_req); ++ client4_0_reopen_cbk, NULL, ++ (xdrproc_t)xdr_gfx_open_req); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED, + "failed to send the re-open request"); +@@ -1312,7 +1311,6 @@ client_setvolume(xlator_t *this, struct rpc_clnt *rpc) + + ret = client_submit_request(this, &req, fr, conf->handshake, + GF_HNDSK_SETVOLUME, client_setvolume_cbk, NULL, +- NULL, 0, NULL, 0, NULL, + (xdrproc_t)xdr_gf_setvolume_req); + + fail: +@@ -1522,8 +1520,7 @@ client_query_portmap(xlator_t *this, struct rpc_clnt *rpc) + + ret = client_submit_request(this, &req, fr, &clnt_pmap_prog, + GF_PMAP_PORTBYBRICK, client_query_portmap_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_pmap_port_by_brick_req); ++ NULL, (xdrproc_t)xdr_pmap_port_by_brick_req); + + fail: + return ret; +@@ -1624,8 +1621,8 @@ client_handshake(xlator_t *this, struct rpc_clnt *rpc) + + req.gfs_id = 0xbabe; + ret = client_submit_request(this, &req, frame, conf->dump, GF_DUMP_DUMP, +- client_dump_version_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gf_dump_req); ++ client_dump_version_cbk, NULL, ++ (xdrproc_t)xdr_gf_dump_req); + + out: + return ret; +diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c +index 2dd7106..53b4484 100644 +--- a/xlators/protocol/client/src/client-helpers.c ++++ b/xlators/protocol/client/src/client-helpers.c +@@ -3082,8 +3082,7 @@ send_release4_0_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx, + gf_msg_trace(this->name, 0, "sending releasedir on fd"); + (void)client_submit_request( + this, &req, fr, conf->fops, GFS3_OP_RELEASEDIR, +- client4_0_releasedir_cbk, NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_releasedir_req); ++ client4_0_releasedir_cbk, NULL, (xdrproc_t)xdr_gfx_releasedir_req); + } else { + gfx_release_req req = { + { +@@ -3094,8 +3093,8 @@ send_release4_0_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx, + req.fd = fdctx->remote_fd; + gf_msg_trace(this->name, 0, "sending release on fd"); + (void)client_submit_request(this, &req, fr, conf->fops, GFS3_OP_RELEASE, +- client4_0_release_cbk, NULL, NULL, 0, NULL, +- 0, NULL, (xdrproc_t)xdr_gfx_release_req); ++ client4_0_release_cbk, NULL, ++ (xdrproc_t)xdr_gfx_release_req); + } + + return 0; +@@ -3118,8 +3117,7 @@ send_release3_3_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx, + gf_msg_trace(this->name, 0, "sending releasedir on fd"); + (void)client_submit_request( + this, &req, fr, conf->fops, GFS3_OP_RELEASEDIR, +- client3_3_releasedir_cbk, NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_releasedir_req); ++ client3_3_releasedir_cbk, NULL, (xdrproc_t)xdr_gfs3_releasedir_req); + } else { + gfs3_release_req req = { + { +@@ -3130,8 +3128,8 @@ send_release3_3_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx, + req.fd = fdctx->remote_fd; + gf_msg_trace(this->name, 0, "sending release on fd"); + (void)client_submit_request(this, &req, fr, conf->fops, GFS3_OP_RELEASE, +- client3_3_release_cbk, NULL, NULL, 0, NULL, +- 0, NULL, (xdrproc_t)xdr_gfs3_release_req); ++ client3_3_release_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_release_req); + } + + return 0; +diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c +index b7df7cc..1c8b31b 100644 +--- a/xlators/protocol/client/src/client-rpc-fops.c ++++ b/xlators/protocol/client/src/client-rpc-fops.c +@@ -3234,11 +3234,13 @@ client3_3_lookup(call_frame_t *frame, xlator_t *this, void *data) + struct iobref *rsp_iobref = NULL; + struct iobuf *rsp_iobuf = NULL; + struct iovec *rsphdr = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + + memset(vector, 0, sizeof(vector)); ++ memset(&cp, 0, sizeof(client_payload_t)); + + conf = this->private; + args = data; +@@ -3288,9 +3290,12 @@ client3_3_lookup(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LOOKUP, +- client3_3_lookup_cbk, NULL, rsphdr, count, NULL, +- 0, local->iobref, ++ client3_3_lookup_cbk, &cp, + (xdrproc_t)xdr_gfs3_lookup_req); + + if (ret) { +@@ -3338,8 +3343,8 @@ client3_3_stat(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STAT, +- client3_3_stat_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_stat_req); ++ client3_3_stat_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_stat_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3381,8 +3386,8 @@ client3_3_truncate(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_TRUNCATE, +- client3_3_truncate_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_truncate_req); ++ client3_3_truncate_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_truncate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3425,8 +3430,7 @@ client3_3_ftruncate(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FTRUNCATE, client3_3_ftruncate_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_ftruncate_req); ++ NULL, (xdrproc_t)xdr_gfs3_ftruncate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3468,8 +3472,8 @@ client3_3_access(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ACCESS, +- client3_3_access_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_access_req); ++ client3_3_access_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_access_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3505,10 +3509,12 @@ client3_3_readlink(call_frame_t *frame, xlator_t *this, void *data) + struct iovec vector[MAX_IOVEC] = { + {0}, + }; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + + conf = this->private; +@@ -3547,9 +3553,11 @@ client3_3_readlink(call_frame_t *frame, xlator_t *this, void *data) + rsp_iobuf = NULL; + rsp_iobref = NULL; + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READLINK, +- client3_3_readlink_cbk, NULL, rsphdr, count, +- NULL, 0, local->iobref, ++ client3_3_readlink_cbk, &cp, + (xdrproc_t)xdr_gfs3_readlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -3595,8 +3603,8 @@ client3_3_unlink(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_UNLINK, +- client3_3_unlink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_unlink_req); ++ client3_3_unlink_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_unlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3638,8 +3646,8 @@ client3_3_rmdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RMDIR, +- client3_3_rmdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_rmdir_req); ++ client3_3_rmdir_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_rmdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3697,8 +3705,8 @@ client3_3_symlink(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SYMLINK, +- client3_3_symlink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_symlink_req); ++ client3_3_symlink_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_symlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3743,8 +3751,8 @@ client3_3_rename(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RENAME, +- client3_3_rename_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_rename_req); ++ client3_3_rename_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_rename_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3802,8 +3810,8 @@ client3_3_link(call_frame_t *frame, xlator_t *this, void *data) + loc_path(&local->loc2, NULL); + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LINK, +- client3_3_link_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_link_req); ++ client3_3_link_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_link_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3857,8 +3865,8 @@ client3_3_mknod(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKNOD, +- client3_3_mknod_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_mknod_req); ++ client3_3_mknod_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_mknod_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3924,8 +3932,8 @@ client3_3_mkdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKDIR, +- client3_3_mkdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_mkdir_req); ++ client3_3_mkdir_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_mkdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3982,8 +3990,8 @@ client3_3_create(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_CREATE, +- client3_3_create_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_create_req); ++ client3_3_create_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_create_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4043,8 +4051,8 @@ client3_3_open(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN, +- client3_3_open_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_open_req); ++ client3_3_open_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_open_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4079,10 +4087,12 @@ client3_3_readv(call_frame_t *frame, xlator_t *this, void *data) + }; + struct iobuf *rsp_iobuf = NULL; + struct iobref *rsp_iobref = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -4130,9 +4140,12 @@ client3_3_readv(call_frame_t *frame, xlator_t *this, void *data) + local->iobref = rsp_iobref; + rsp_iobref = NULL; + ++ cp.rsp_payload = &rsp_vec; ++ cp.rsp_payload_cnt = 1; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READ, +- client3_3_readv_cbk, NULL, NULL, 0, &rsp_vec, 1, +- local->iobref, (xdrproc_t)xdr_gfs3_read_req); ++ client3_3_readv_cbk, &cp, ++ (xdrproc_t)xdr_gfs3_read_req); + if (ret) { + // unwind is done in the cbk + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -4167,10 +4180,12 @@ client3_3_writev(call_frame_t *frame, xlator_t *this, void *data) + }; + int op_errno = ESTALE; + int ret = 0; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -4187,9 +4202,12 @@ client3_3_writev(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.iobref = args->iobref; ++ cp.payload = args->vector; ++ cp.payload_cnt = args->count; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_WRITE, +- client3_3_writev_cbk, args->iobref, +- args->vector, args->count, NULL, 0, NULL, ++ client3_3_writev_cbk, &cp, + (xdrproc_t)xdr_gfs3_write_req); + if (ret) { + /* +@@ -4248,8 +4266,8 @@ client3_3_flush(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH, +- client3_3_flush_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_flush_req); ++ client3_3_flush_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_flush_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4291,8 +4309,8 @@ client3_3_fsync(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNC, +- client3_3_fsync_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_fsync_req); ++ client3_3_fsync_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_fsync_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4336,8 +4354,8 @@ client3_3_fstat(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSTAT, +- client3_3_fstat_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_fstat_req); ++ client3_3_fstat_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_fstat_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4391,8 +4409,8 @@ client3_3_opendir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR, +- client3_3_opendir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_opendir_req); ++ client3_3_opendir_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_opendir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4435,8 +4453,8 @@ client3_3_fsyncdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNCDIR, +- client3_3_fsyncdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_fsyncdir_req); ++ client3_3_fsyncdir_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_fsyncdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4479,8 +4497,8 @@ client3_3_statfs(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STATFS, +- client3_3_statfs_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_statfs_req); ++ client3_3_statfs_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_statfs_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4523,8 +4541,8 @@ client3_3_setxattr(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETXATTR, +- client3_3_setxattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_setxattr_req); ++ client3_3_setxattr_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_setxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4570,8 +4588,7 @@ client3_3_fsetxattr(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FSETXATTR, client3_3_fsetxattr_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_fsetxattr_req); ++ NULL, (xdrproc_t)xdr_gfs3_fsetxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4611,10 +4628,12 @@ client3_3_fgetxattr(call_frame_t *frame, xlator_t *this, void *data) + struct iovec vector[MAX_IOVEC] = { + {0}, + }; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -4654,9 +4673,12 @@ client3_3_fgetxattr(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, +- GFS3_OP_FGETXATTR, client3_3_fgetxattr_cbk, +- NULL, rsphdr, count, NULL, 0, local->iobref, ++ GFS3_OP_FGETXATTR, client3_3_fgetxattr_cbk, &cp, + (xdrproc_t)xdr_gfs3_fgetxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -4699,11 +4721,14 @@ client3_3_getxattr(call_frame_t *frame, xlator_t *this, void *data) + struct iovec vector[MAX_IOVEC] = { + {0}, + }; ++ client_payload_t cp; + + if (!frame || !this || !data) { + op_errno = 0; + goto unwind; + } ++ ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + + local = mem_get0(this->local_pool); +@@ -4775,9 +4800,12 @@ client3_3_getxattr(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_GETXATTR, +- client3_3_getxattr_cbk, NULL, rsphdr, count, +- NULL, 0, local->iobref, ++ client3_3_getxattr_cbk, &cp, + (xdrproc_t)xdr_gfs3_getxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -4822,10 +4850,12 @@ client3_3_xattrop(call_frame_t *frame, xlator_t *this, void *data) + struct iovec vector[MAX_IOVEC] = { + {0}, + }; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + + if (!(args->loc && args->loc->inode)) +@@ -4871,9 +4901,12 @@ client3_3_xattrop(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_XATTROP, +- client3_3_xattrop_cbk, NULL, rsphdr, count, +- NULL, 0, local->iobref, ++ client3_3_xattrop_cbk, &cp, + (xdrproc_t)xdr_gfs3_xattrop_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -4918,10 +4951,12 @@ client3_3_fxattrop(call_frame_t *frame, xlator_t *this, void *data) + struct iovec vector[MAX_IOVEC] = { + {0}, + }; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -4962,9 +4997,11 @@ client3_3_fxattrop(call_frame_t *frame, xlator_t *this, void *data) + rsp_iobuf = NULL; + rsp_iobref = NULL; + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FXATTROP, +- client3_3_fxattrop_cbk, NULL, rsphdr, count, +- NULL, 0, local->iobref, ++ client3_3_fxattrop_cbk, &cp, + (xdrproc_t)xdr_gfs3_fxattrop_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5016,8 +5053,7 @@ client3_3_removexattr(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_REMOVEXATTR, client3_3_removexattr_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_removexattr_req); ++ NULL, (xdrproc_t)xdr_gfs3_removexattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5059,10 +5095,9 @@ client3_3_fremovexattr(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } +- ret = client_submit_request(this, &req, frame, conf->fops, +- GFS3_OP_FREMOVEXATTR, +- client3_3_fremovexattr_cbk, NULL, NULL, 0, NULL, +- 0, NULL, (xdrproc_t)xdr_gfs3_fremovexattr_req); ++ ret = client_submit_request( ++ this, &req, frame, conf->fops, GFS3_OP_FREMOVEXATTR, ++ client3_3_fremovexattr_cbk, NULL, (xdrproc_t)xdr_gfs3_fremovexattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5104,8 +5139,8 @@ client3_3_lease(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LEASE, +- client3_3_lease_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_lease_req); ++ client3_3_lease_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_lease_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5167,7 +5202,7 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK, +- client3_3_lk_cbk, NULL, NULL, 0, NULL, 0, NULL, ++ client3_3_lk_cbk, NULL, + (xdrproc_t)xdr_gfs3_lk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5210,8 +5245,8 @@ client3_3_inodelk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_INODELK, +- client3_3_inodelk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_inodelk_req); ++ client3_3_inodelk_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_inodelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5260,8 +5295,8 @@ client3_3_finodelk(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FINODELK, +- client3_3_finodelk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_finodelk_req); ++ client3_3_finodelk_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_finodelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5305,8 +5340,8 @@ client3_3_entrylk(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ENTRYLK, +- client3_3_entrylk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_entrylk_req); ++ client3_3_entrylk_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_entrylk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5349,8 +5384,8 @@ client3_3_fentrylk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FENTRYLK, +- client3_3_fentrylk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_fentrylk_req); ++ client3_3_fentrylk_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_fentrylk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5391,8 +5426,7 @@ client3_3_rchecksum(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_RCHECKSUM, client3_3_rchecksum_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_rchecksum_req); ++ NULL, (xdrproc_t)xdr_gfs3_rchecksum_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5433,10 +5467,12 @@ client3_3_readdir(call_frame_t *frame, xlator_t *this, void *data) + {0}, + }; + int readdir_rsp_size = 0; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -5486,9 +5522,11 @@ client3_3_readdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = rsp_iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIR, +- client3_3_readdir_cbk, NULL, rsphdr, count, +- NULL, 0, rsp_iobref, ++ client3_3_readdir_cbk, &cp, + (xdrproc_t)xdr_gfs3_readdir_req); + + if (ret) { +@@ -5534,10 +5572,12 @@ client3_3_readdirp(call_frame_t *frame, xlator_t *this, void *data) + {0}, + }; + clnt_local_t *local = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -5587,9 +5627,11 @@ client3_3_readdirp(call_frame_t *frame, xlator_t *this, void *data) + + local->fd = fd_ref(args->fd); + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = rsp_iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIRP, +- client3_3_readdirp_cbk, NULL, rsphdr, count, +- NULL, 0, rsp_iobref, ++ client3_3_readdirp_cbk, &cp, + (xdrproc_t)xdr_gfs3_readdirp_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5637,8 +5679,8 @@ client3_3_setattr(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETATTR, +- client3_3_setattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_setattr_req); ++ client3_3_setattr_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_setattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5679,8 +5721,8 @@ client3_3_fsetattr(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSETATTR, +- client3_3_fsetattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_fsetattr_req); ++ client3_3_fsetattr_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_fsetattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5722,8 +5764,7 @@ client3_3_fallocate(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FALLOCATE, client3_3_fallocate_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_fallocate_req); ++ NULL, (xdrproc_t)xdr_gfs3_fallocate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5764,8 +5805,8 @@ client3_3_discard(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_DISCARD, +- client3_3_discard_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_discard_req); ++ client3_3_discard_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_discard_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5807,8 +5848,8 @@ client3_3_zerofill(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ZEROFILL, +- client3_3_zerofill_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_zerofill_req); ++ client3_3_zerofill_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_zerofill_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5850,7 +5891,7 @@ client3_3_ipc(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_IPC, +- client3_3_ipc_cbk, NULL, NULL, 0, NULL, 0, NULL, ++ client3_3_ipc_cbk, NULL, + (xdrproc_t)xdr_gfs3_ipc_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5895,8 +5936,8 @@ client3_3_seek(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SEEK, +- client3_3_seek_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_seek_req); ++ client3_3_seek_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_seek_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -6083,8 +6124,7 @@ client3_3_getactivelk(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_GETACTIVELK, client3_3_getactivelk_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_getactivelk_req); ++ NULL, (xdrproc_t)xdr_gfs3_getactivelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -6141,8 +6181,7 @@ client3_3_setactivelk(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_SETACTIVELK, client3_3_setactivelk_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_setactivelk_req); ++ NULL, (xdrproc_t)xdr_gfs3_setactivelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c +index 8f3ee41..2673b6e 100644 +--- a/xlators/protocol/client/src/client-rpc-fops_v2.c ++++ b/xlators/protocol/client/src/client-rpc-fops_v2.c +@@ -3005,11 +3005,13 @@ client4_0_lookup(call_frame_t *frame, xlator_t *this, void *data) + struct iobref *rsp_iobref = NULL; + struct iobuf *rsp_iobuf = NULL; + struct iovec *rsphdr = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + + memset(vector, 0, sizeof(vector)); ++ memset(&cp, 0, sizeof(client_payload_t)); + + conf = this->private; + args = data; +@@ -3059,9 +3061,12 @@ client4_0_lookup(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LOOKUP, +- client4_0_lookup_cbk, NULL, rsphdr, count, NULL, +- 0, local->iobref, ++ client4_0_lookup_cbk, &cp, + (xdrproc_t)xdr_gfx_lookup_req); + + if (ret) { +@@ -3109,8 +3114,8 @@ client4_0_stat(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STAT, +- client4_0_stat_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_stat_req); ++ client4_0_stat_cbk, NULL, ++ (xdrproc_t)xdr_gfx_stat_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3153,8 +3158,8 @@ client4_0_truncate(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_TRUNCATE, +- client4_0_truncate_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_truncate_req); ++ client4_0_truncate_cbk, NULL, ++ (xdrproc_t)xdr_gfx_truncate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3198,8 +3203,7 @@ client4_0_ftruncate(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FTRUNCATE, client4_0_ftruncate_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_ftruncate_req); ++ NULL, (xdrproc_t)xdr_gfx_ftruncate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3241,8 +3245,8 @@ client4_0_access(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ACCESS, +- client4_0_access_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_access_req); ++ client4_0_access_cbk, NULL, ++ (xdrproc_t)xdr_gfx_access_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3295,8 +3299,8 @@ client4_0_readlink(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READLINK, +- client4_0_readlink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_readlink_req); ++ client4_0_readlink_cbk, NULL, ++ (xdrproc_t)xdr_gfx_readlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3338,8 +3342,8 @@ client4_0_unlink(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_UNLINK, +- client4_0_unlink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_unlink_req); ++ client4_0_unlink_cbk, NULL, ++ (xdrproc_t)xdr_gfx_unlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3381,8 +3385,8 @@ client4_0_rmdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RMDIR, +- client4_0_rmdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_rmdir_req); ++ client4_0_rmdir_cbk, NULL, ++ (xdrproc_t)xdr_gfx_rmdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3440,8 +3444,8 @@ client4_0_symlink(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SYMLINK, +- client4_0_symlink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_symlink_req); ++ client4_0_symlink_cbk, NULL, ++ (xdrproc_t)xdr_gfx_symlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3486,8 +3490,8 @@ client4_0_rename(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RENAME, +- client4_0_rename_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_rename_req); ++ client4_0_rename_cbk, NULL, ++ (xdrproc_t)xdr_gfx_rename_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3546,8 +3550,8 @@ client4_0_link(call_frame_t *frame, xlator_t *this, void *data) + loc_path(&local->loc2, NULL); + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LINK, +- client4_0_link_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_link_req); ++ client4_0_link_cbk, NULL, ++ (xdrproc_t)xdr_gfx_link_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3601,8 +3605,8 @@ client4_0_mknod(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKNOD, +- client4_0_mknod_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_mknod_req); ++ client4_0_mknod_cbk, NULL, ++ (xdrproc_t)xdr_gfx_mknod_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3668,8 +3672,8 @@ client4_0_mkdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKDIR, +- client4_0_mkdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_mkdir_req); ++ client4_0_mkdir_cbk, NULL, ++ (xdrproc_t)xdr_gfx_mkdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3726,8 +3730,8 @@ client4_0_create(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_CREATE, +- client4_0_create_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_create_req); ++ client4_0_create_cbk, NULL, ++ (xdrproc_t)xdr_gfx_create_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3787,8 +3791,8 @@ client4_0_open(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN, +- client4_0_open_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_open_req); ++ client4_0_open_cbk, NULL, ++ (xdrproc_t)xdr_gfx_open_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3823,10 +3827,12 @@ client4_0_readv(call_frame_t *frame, xlator_t *this, void *data) + }; + struct iobuf *rsp_iobuf = NULL; + struct iobref *rsp_iobref = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -3872,9 +3878,12 @@ client4_0_readv(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ++ cp.rsp_payload = &rsp_vec; ++ cp.rsp_payload_cnt = 1; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READ, +- client4_0_readv_cbk, NULL, NULL, 0, &rsp_vec, 1, +- local->iobref, (xdrproc_t)xdr_gfx_read_req); ++ client4_0_readv_cbk, &cp, ++ (xdrproc_t)xdr_gfx_read_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3905,10 +3914,12 @@ client4_0_writev(call_frame_t *frame, xlator_t *this, void *data) + }; + int op_errno = ESTALE; + int ret = 0; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -3926,9 +3937,11 @@ client4_0_writev(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ++ cp.iobref = args->iobref; ++ cp.payload = args->vector; ++ cp.payload_cnt = args->count; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_WRITE, +- client4_0_writev_cbk, args->iobref, +- args->vector, args->count, NULL, 0, NULL, ++ client4_0_writev_cbk, &cp, + (xdrproc_t)xdr_gfx_write_req); + if (ret) { + /* +@@ -3987,8 +4000,8 @@ client4_0_flush(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH, +- client4_0_flush_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_flush_req); ++ client4_0_flush_cbk, NULL, ++ (xdrproc_t)xdr_gfx_flush_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4030,8 +4043,8 @@ client4_0_fsync(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNC, +- client4_0_fsync_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fsync_req); ++ client4_0_fsync_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fsync_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4075,8 +4088,8 @@ client4_0_fstat(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSTAT, +- client4_0_fstat_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fstat_req); ++ client4_0_fstat_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fstat_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4130,8 +4143,8 @@ client4_0_opendir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR, +- client4_0_opendir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_opendir_req); ++ client4_0_opendir_cbk, NULL, ++ (xdrproc_t)xdr_gfx_opendir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4175,8 +4188,8 @@ client4_0_fsyncdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNCDIR, +- client4_0_fsyncdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fsyncdir_req); ++ client4_0_fsyncdir_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fsyncdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4219,8 +4232,8 @@ client4_0_statfs(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STATFS, +- client4_0_statfs_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_statfs_req); ++ client4_0_statfs_cbk, NULL, ++ (xdrproc_t)xdr_gfx_statfs_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4263,8 +4276,8 @@ client4_0_setxattr(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETXATTR, +- client4_0_setxattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_setxattr_req); ++ client4_0_setxattr_cbk, NULL, ++ (xdrproc_t)xdr_gfx_setxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4310,8 +4323,7 @@ client4_0_fsetxattr(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FSETXATTR, client4_0_fsetxattr_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_fsetxattr_req); ++ NULL, (xdrproc_t)xdr_gfx_fsetxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4364,8 +4376,7 @@ client4_0_fgetxattr(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FGETXATTR, client4_0_fgetxattr_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_fgetxattr_req); ++ NULL, (xdrproc_t)xdr_gfx_fgetxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4452,8 +4463,8 @@ client4_0_getxattr(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_GETXATTR, +- client4_0_getxattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_getxattr_req); ++ client4_0_getxattr_cbk, NULL, ++ (xdrproc_t)xdr_gfx_getxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4514,8 +4525,8 @@ client4_0_xattrop(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_XATTROP, +- client4_0_xattrop_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_xattrop_req); ++ client4_0_xattrop_cbk, NULL, ++ (xdrproc_t)xdr_gfx_xattrop_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4566,8 +4577,8 @@ client4_0_fxattrop(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FXATTROP, +- client4_0_fxattrop_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fxattrop_req); ++ client4_0_fxattrop_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fxattrop_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4613,8 +4624,7 @@ client4_0_removexattr(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_REMOVEXATTR, client4_0_removexattr_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_removexattr_req); ++ NULL, (xdrproc_t)xdr_gfx_removexattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4656,10 +4666,9 @@ client4_0_fremovexattr(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } +- ret = client_submit_request(this, &req, frame, conf->fops, +- GFS3_OP_FREMOVEXATTR, +- client4_0_fremovexattr_cbk, NULL, NULL, 0, NULL, +- 0, NULL, (xdrproc_t)xdr_gfx_fremovexattr_req); ++ ret = client_submit_request( ++ this, &req, frame, conf->fops, GFS3_OP_FREMOVEXATTR, ++ client4_0_fremovexattr_cbk, NULL, (xdrproc_t)xdr_gfx_fremovexattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4701,8 +4710,8 @@ client4_0_lease(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LEASE, +- client4_0_lease_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_lease_req); ++ client4_0_lease_cbk, NULL, ++ (xdrproc_t)xdr_gfx_lease_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4764,7 +4773,7 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK, +- client4_0_lk_cbk, NULL, NULL, 0, NULL, 0, NULL, ++ client4_0_lk_cbk, NULL, + (xdrproc_t)xdr_gfx_lk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -4807,8 +4816,8 @@ client4_0_inodelk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_INODELK, +- client4_0_inodelk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_inodelk_req); ++ client4_0_inodelk_cbk, NULL, ++ (xdrproc_t)xdr_gfx_inodelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4857,8 +4866,8 @@ client4_0_finodelk(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FINODELK, +- client4_0_finodelk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_finodelk_req); ++ client4_0_finodelk_cbk, NULL, ++ (xdrproc_t)xdr_gfx_finodelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4902,8 +4911,8 @@ client4_0_entrylk(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ENTRYLK, +- client4_0_entrylk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_entrylk_req); ++ client4_0_entrylk_cbk, NULL, ++ (xdrproc_t)xdr_gfx_entrylk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4946,8 +4955,8 @@ client4_0_fentrylk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FENTRYLK, +- client4_0_fentrylk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fentrylk_req); ++ client4_0_fentrylk_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fentrylk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4988,10 +4997,12 @@ client4_0_readdir(call_frame_t *frame, xlator_t *this, void *data) + {0}, + }; + int readdir_rsp_size = 0; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -5041,9 +5052,11 @@ client4_0_readdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = rsp_iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIR, +- client4_0_readdir_cbk, NULL, rsphdr, count, +- NULL, 0, rsp_iobref, ++ client4_0_readdir_cbk, &cp, + (xdrproc_t)xdr_gfx_readdir_req); + + if (ret) { +@@ -5089,10 +5102,12 @@ client4_0_readdirp(call_frame_t *frame, xlator_t *this, void *data) + {0}, + }; + clnt_local_t *local = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -5142,9 +5157,11 @@ client4_0_readdirp(call_frame_t *frame, xlator_t *this, void *data) + + local->fd = fd_ref(args->fd); + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = rsp_iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIRP, +- client4_0_readdirp_cbk, NULL, rsphdr, count, +- NULL, 0, rsp_iobref, ++ client4_0_readdirp_cbk, &cp, + (xdrproc_t)xdr_gfx_readdirp_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5192,8 +5209,8 @@ client4_0_setattr(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETATTR, +- client4_0_setattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_setattr_req); ++ client4_0_setattr_cbk, NULL, ++ (xdrproc_t)xdr_gfx_setattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5235,8 +5252,7 @@ client4_0_fallocate(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FALLOCATE, client4_0_fallocate_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_fallocate_req); ++ NULL, (xdrproc_t)xdr_gfx_fallocate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5277,8 +5293,8 @@ client4_0_discard(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_DISCARD, +- client4_0_discard_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_discard_req); ++ client4_0_discard_cbk, NULL, ++ (xdrproc_t)xdr_gfx_discard_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5320,8 +5336,8 @@ client4_0_zerofill(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ZEROFILL, +- client4_0_zerofill_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_zerofill_req); ++ client4_0_zerofill_cbk, NULL, ++ (xdrproc_t)xdr_gfx_zerofill_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5363,7 +5379,7 @@ client4_0_ipc(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_IPC, +- client4_0_ipc_cbk, NULL, NULL, 0, NULL, 0, NULL, ++ client4_0_ipc_cbk, NULL, + (xdrproc_t)xdr_gfx_ipc_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5408,8 +5424,8 @@ client4_0_seek(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SEEK, +- client4_0_seek_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_seek_req); ++ client4_0_seek_cbk, NULL, ++ (xdrproc_t)xdr_gfx_seek_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5458,8 +5474,7 @@ client4_0_getactivelk(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_GETACTIVELK, client4_0_getactivelk_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_getactivelk_req); ++ NULL, (xdrproc_t)xdr_gfx_getactivelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5514,8 +5529,7 @@ client4_0_setactivelk(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_SETACTIVELK, client4_0_setactivelk_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_setactivelk_req); ++ NULL, (xdrproc_t)xdr_gfx_setactivelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5785,8 +5799,8 @@ client4_0_namelink(call_frame_t *frame, xlator_t *this, void *data) + + dict_to_xdr(args->xdata, &req.xdata); + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_NAMELINK, +- client4_namelink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_namelink_req); ++ client4_namelink_cbk, NULL, ++ (xdrproc_t)xdr_gfx_namelink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5837,8 +5851,8 @@ client4_0_icreate(call_frame_t *frame, xlator_t *this, void *data) + op_errno = ESTALE; + dict_to_xdr(args->xdata, &req.xdata); + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ICREATE, +- client4_icreate_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_icreate_req); ++ client4_icreate_cbk, NULL, ++ (xdrproc_t)xdr_gfx_icreate_req); + if (ret) + goto free_reqdata; + GF_FREE(req.xdata.pairs.pairs_val); +@@ -5864,10 +5878,12 @@ client4_0_put(call_frame_t *frame, xlator_t *this, void *data) + int op_errno = ESTALE; + int ret = 0; + clnt_local_t *local = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -5890,9 +5906,11 @@ client4_0_put(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ++ cp.iobref = args->iobref; ++ cp.payload = args->vector; ++ cp.payload_cnt = args->count; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_PUT, +- client4_0_put_cbk, args->iobref, args->vector, +- args->count, NULL, 0, NULL, ++ client4_0_put_cbk, &cp, + (xdrproc_t)xdr_gfx_put_req); + if (ret) { + /* +@@ -5959,10 +5977,10 @@ client4_0_copy_file_range(call_frame_t *frame, xlator_t *this, void *data) + local->attempt_reopen_out = client_is_reopen_needed(args->fd_out, this, + req.fd_out); + +- ret = client_submit_request( +- this, &req, frame, conf->fops, GFS3_OP_COPY_FILE_RANGE, +- client4_0_copy_file_range_cbk, NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_copy_file_range_req); ++ ret = client_submit_request(this, &req, frame, conf->fops, ++ GFS3_OP_COPY_FILE_RANGE, ++ client4_0_copy_file_range_cbk, NULL, ++ (xdrproc_t)xdr_gfx_copy_file_range_req); + if (ret) { + /* + * If the lower layers fail to submit a request, they'll also +@@ -6009,8 +6027,8 @@ client4_0_fsetattr(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSETATTR, +- client4_0_fsetattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fsetattr_req); ++ client4_0_fsetattr_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fsetattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -6054,9 +6072,9 @@ client4_0_rchecksum(call_frame_t *frame, xlator_t *this, void *data) + + dict_to_xdr(args->xdata, &req.xdata); + +- ret = client_submit_request( +- this, &req, frame, conf->fops, GFS3_OP_RCHECKSUM, client4_rchecksum_cbk, +- NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gfx_rchecksum_req); ++ ret = client_submit_request(this, &req, frame, conf->fops, ++ GFS3_OP_RCHECKSUM, client4_rchecksum_cbk, NULL, ++ (xdrproc_t)xdr_gfx_rchecksum_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index dea6c28..2d75714 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -131,10 +131,7 @@ client_type_to_gf_type(short l_type) + int + client_submit_request(xlator_t *this, void *req, call_frame_t *frame, + rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn, +- struct iobref *iobref, struct iovec *payload, +- int payloadcnt, struct iovec *rsp_payload, +- int rsp_payload_count, struct iobref *rsp_iobref, +- xdrproc_t xdrproc) ++ client_payload_t *cp, xdrproc_t xdrproc) + { + int ret = -1; + clnt_conf_t *conf = NULL; +@@ -180,8 +177,8 @@ client_submit_request(xlator_t *this, void *req, call_frame_t *frame, + goto out; + } + +- if (iobref != NULL) { +- ret = iobref_merge(new_iobref, iobref); ++ if (cp && cp->iobref != NULL) { ++ ret = iobref_merge(new_iobref, cp->iobref); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, PC_MSG_NO_MEMORY, + "cannot merge " +@@ -224,9 +221,16 @@ client_submit_request(xlator_t *this, void *req, call_frame_t *frame, + } + + /* Send the msg */ +- ret = rpc_clnt_submit(conf->rpc, prog, procnum, cbkfn, &iov, count, payload, +- payloadcnt, new_iobref, frame, payload, payloadcnt, +- rsp_payload, rsp_payload_count, rsp_iobref); ++ if (cp) { ++ ret = rpc_clnt_submit(conf->rpc, prog, procnum, cbkfn, &iov, count, ++ cp->payload, cp->payload_cnt, new_iobref, frame, ++ cp->rsphdr, cp->rsphdr_cnt, cp->rsp_payload, ++ cp->rsp_payload_cnt, cp->rsp_iobref); ++ } else { ++ ret = rpc_clnt_submit(conf->rpc, prog, procnum, cbkfn, &iov, count, ++ NULL, 0, new_iobref, frame, NULL, 0, NULL, 0, ++ NULL); ++ } + + if (ret < 0) { + gf_msg_debug(this->name, 0, "rpc_clnt_submit failed"); +diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h +index 71f84f3..f12fa61 100644 +--- a/xlators/protocol/client/src/client.h ++++ b/xlators/protocol/client/src/client.h +@@ -345,6 +345,17 @@ typedef struct client_args { + lock_migration_info_t *locklist; + } clnt_args_t; + ++typedef struct client_payload { ++ struct iobref *iobref; ++ struct iovec *payload; ++ struct iovec *rsphdr; ++ struct iovec *rsp_payload; ++ struct iobref *rsp_iobref; ++ int payload_cnt; ++ int rsphdr_cnt; ++ int rsp_payload_cnt; ++} client_payload_t; ++ + typedef ssize_t (*gfs_serialize_t)(struct iovec outmsg, void *args); + + clnt_fd_ctx_t * +@@ -359,10 +370,7 @@ client_local_wipe(clnt_local_t *local); + int + client_submit_request(xlator_t *this, void *req, call_frame_t *frame, + rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbk, +- struct iobref *iobref, struct iovec *rsphdr, +- int rsphdr_count, struct iovec *rsp_payload, +- int rsp_count, struct iobref *rsp_iobref, +- xdrproc_t xdrproc); ++ client_payload_t *cp, xdrproc_t xdrproc); + + int + client_submit_compound_request(xlator_t *this, void *req, call_frame_t *frame, +-- +1.8.3.1 + diff --git a/SOURCES/0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch b/SOURCES/0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch new file mode 100644 index 0000000..9f5f3bf --- /dev/null +++ b/SOURCES/0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch @@ -0,0 +1,115 @@ +From 2449a1824c6f7b57889335caaeb09f4c5cb3efce Mon Sep 17 00:00:00 2001 +From: Soumya Koduri +Date: Thu, 28 Mar 2019 14:59:00 +0530 +Subject: [PATCH 48/52] gfapi: Unblock epoll thread for upcall processing + +With commit#ad35193,we have made changes to offload +processing upcall notifications to synctask so as not +to block epoll threads. However seems like the issue wasnt +fully addressed. + +In "glfs_cbk_upcall_data" -> "synctask_new1" after creating synctask +if there is no callback defined, the thread waits on synctask_join +till the syncfn is finished. So that way even with those changes, +epoll threads are blocked till the upcalls are processed. + +Hence the right fix now is to define a callback function for that +synctask "glfs_cbk_upcall_syncop" so as to unblock epoll/notify threads +completely and the upcall processing can happen in parallel by synctask +threads. + +Upstream references- +mainline : https://review.gluster.org/22436 +release-6.0 : https://review.gluster.org/22459 + +Change-Id: I4d8645e3588fab2c3ca534e0112773aaab68a5dd +fixes: bz#1694565 +Signed-off-by: Soumya Koduri +Reviewed-on: https://code.engineering.redhat.com/gerrit/166586 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + api/src/glfs-fops.c | 42 ++++++++++++++++++++++++++++++++++-------- + 1 file changed, 34 insertions(+), 8 deletions(-) + +diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c +index 88cd32b..01ba60b 100644 +--- a/api/src/glfs-fops.c ++++ b/api/src/glfs-fops.c +@@ -5714,6 +5714,16 @@ out: + } + + static int ++glfs_upcall_syncop_cbk(int ret, call_frame_t *frame, void *opaque) ++{ ++ struct upcall_syncop_args *args = opaque; ++ ++ GF_FREE(args->upcall_data); ++ GF_FREE(args); ++ return 0; ++} ++ ++static int + glfs_cbk_upcall_syncop(void *opaque) + { + struct upcall_syncop_args *args = opaque; +@@ -5770,15 +5780,13 @@ out: + GLFS_FREE(up_arg); + } + +- return ret; ++ return 0; + } + + static void + glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data) + { +- struct upcall_syncop_args args = { +- 0, +- }; ++ struct upcall_syncop_args *args = NULL; + int ret = -1; + + if (!fs || !upcall_data) +@@ -5789,16 +5797,34 @@ glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data) + goto out; + } + +- args.fs = fs; +- args.upcall_data = upcall_data; ++ args = GF_CALLOC(1, sizeof(struct upcall_syncop_args), ++ glfs_mt_upcall_entry_t); ++ if (!args) { ++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, ++ "Upcall syncop args allocation failed."); ++ goto out; ++ } ++ ++ /* Note: we are not taking any ref on fs here. ++ * Ideally applications have to unregister for upcall events ++ * or stop polling for upcall events before performing ++ * glfs_fini. And as for outstanding synctasks created, we wait ++ * for all syncenv threads to finish tasks before cleaning up the ++ * fs->ctx. Hence it seems safe to process these callback ++ * notification without taking any lock/ref. ++ */ ++ args->fs = fs; ++ args->upcall_data = gf_memdup(upcall_data, sizeof(*upcall_data)); + +- ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop, NULL, NULL, +- &args); ++ ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop, ++ glfs_upcall_syncop_cbk, NULL, args); + /* should we retry incase of failure? */ + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_UPCALL_SYNCOP_FAILED, + "Synctak for Upcall event_type(%d) and gfid(%s) failed", + upcall_data->event_type, (char *)(upcall_data->gfid)); ++ GF_FREE(args->upcall_data); ++ GF_FREE(args); + } + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0049-transport-socket-log-shutdown-msg-occasionally.patch b/SOURCES/0049-transport-socket-log-shutdown-msg-occasionally.patch new file mode 100644 index 0000000..7cee050 --- /dev/null +++ b/SOURCES/0049-transport-socket-log-shutdown-msg-occasionally.patch @@ -0,0 +1,49 @@ +From e205516ef874d617e4756856098bf10e17b0ea3d Mon Sep 17 00:00:00 2001 +From: Raghavendra G +Date: Fri, 22 Mar 2019 10:40:45 +0530 +Subject: [PATCH 49/52] transport/socket: log shutdown msg occasionally + +>Change-Id: If3fc0884e7e2f45de2d278b98693b7a473220a5f +>Signed-off-by: Raghavendra G +>Fixes: bz#1691616 + +BUG: 1691620 +Change-Id: If3fc0884e7e2f45de2d278b98693b7a473220a5f +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/167088 +Tested-by: RHGS Build Bot +--- + rpc/rpc-transport/socket/src/socket.c | 4 ++-- + rpc/rpc-transport/socket/src/socket.h | 1 + + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 121d46b..f6de1d3 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -807,8 +807,8 @@ __socket_shutdown(rpc_transport_t *this) + gf_log(this->name, GF_LOG_DEBUG, "shutdown() returned %d. %s", ret, + strerror(errno)); + } else { +- gf_log(this->name, GF_LOG_INFO, "intentional socket shutdown(%d)", +- priv->sock); ++ GF_LOG_OCCASIONALLY(priv->shutdown_log_ctr, this->name, GF_LOG_INFO, ++ "intentional socket shutdown(%d)", priv->sock); + } + + return ret; +diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h +index 32339d3..e1ccae2 100644 +--- a/rpc/rpc-transport/socket/src/socket.h ++++ b/rpc/rpc-transport/socket/src/socket.h +@@ -219,6 +219,7 @@ typedef struct { + int keepalivecnt; + int timeout; + int log_ctr; ++ int shutdown_log_ctr; + /* ssl_error_required is used only during the SSL connection setup + * phase. + * It holds the error code returned by SSL_get_error() and is used to +-- +1.8.3.1 + diff --git a/SOURCES/0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch b/SOURCES/0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch new file mode 100644 index 0000000..f7c0f65 --- /dev/null +++ b/SOURCES/0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch @@ -0,0 +1,142 @@ +From 161a039f8088bf8ce7000d8175e3793219525179 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Thu, 28 Mar 2019 07:17:16 -0400 +Subject: [PATCH 50/52] geo-rep: Fix syncing multiple rename of symlink + +Problem: +Geo-rep fails to sync rename of symlink if it's +renamed multiple times if creation and rename +happened successively + +Worker crash at slave: +Traceback (most recent call last): + File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", in worker + res = getattr(self.obj, rmeth)(*in_data[2:]) + File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", in entry_ops + [ESTALE, EINVAL, EBUSY]) + File "/usr/libexec/glusterfs/python/syncdaemon/syncdutils.py", in errno_wrap + return call(*arg) + File "/usr/libexec/glusterfs/python/syncdaemon/libcxattr.py", in lsetxattr + cls.raise_oserr() + File "/usr/libexec/glusterfs/python/syncdaemon/libcxattr.py", in raise_oserr + raise OSError(errn, os.strerror(errn)) +OSError: [Errno 12] Cannot allocate memory + +Geo-rep Behaviour: +1. SYMLINK doesn't record target path in changelog. + So while syncing SYMLINK, readlink is done on + master to get target path. + +2. Geo-rep will create destination if source is not + present while syncing RENAME. Hence while syncing + RENAME of SYMLINK, target path is collected from + destination. + +Cause: +If symlink is created and renamed multiple times, creation of +symlink is ignored, as it's no longer present on master at +that path. While symlink is renamed multiple times at master, +when syncing first RENAME of SYMLINK, both source and destination +is not present, hence target path is not known. In this case, +while creating destination directly at slave, regular file +attributes were encoded into blob instead of symlink, +causing failure in gfid-access translator while decoding +blob. + +Solution: +While syncing of RENAME of SYMLINK, when target is not known +and when src and destination is not present on the master, +don't create destination. Ignore the rename. It's ok to ignore. +If it's unliked, it's fine. If it's renamed to something else, +it will be synced then. + +Backport of: +> Patch: https://review.gluster.org/22438 +> Change-Id: Ibdfa495513b7c05b5370ab0b89c69a6802338d87 +> BUG: 1693648 +> Signed-off-by: Kotresh HR + +Change-Id: Ibdfa495513b7c05b5370ab0b89c69a6802338d87 +fixes: bz#1670429 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/167122 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/resource.py | 23 ++++++++++++++--------- + tests/00-geo-rep/georep-basic-dr-rsync.t | 1 + + tests/geo-rep.rc | 12 ++++++++++++ + 3 files changed, 27 insertions(+), 9 deletions(-) + +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index a2d0b16..c290d86 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -625,15 +625,20 @@ class Server(object): + # exist with different gfid. + if not matching_disk_gfid(gfid, entry): + if e['stat'] and not stat.S_ISDIR(e['stat']['mode']): +- if stat.S_ISLNK(e['stat']['mode']) and \ +- e['link'] is not None: +- st1 = lstat(en) +- if isinstance(st1, int): +- (pg, bname) = entry2pb(en) +- blob = entry_pack_symlink(cls, gfid, bname, +- e['link'], e['stat']) +- elif not matching_disk_gfid(gfid, en): +- collect_failure(e, EEXIST, uid, gid, True) ++ if stat.S_ISLNK(e['stat']['mode']): ++ # src is not present, so don't sync symlink as ++ # we don't know target. It's ok to ignore. If ++ # it's unliked, it's fine. If it's renamed to ++ # something else, it will be synced then. ++ if e['link'] is not None: ++ st1 = lstat(en) ++ if isinstance(st1, int): ++ (pg, bname) = entry2pb(en) ++ blob = entry_pack_symlink(cls, gfid, bname, ++ e['link'], ++ e['stat']) ++ elif not matching_disk_gfid(gfid, en): ++ collect_failure(e, EEXIST, uid, gid, True) + else: + slink = os.path.join(pfx, gfid) + st = lstat(slink) +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t +index 4a03930..8b64370 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t +@@ -110,6 +110,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1 + #Check History Crawl. + TEST $GEOREP_CLI $master $slave stop + TEST create_data "history" ++TEST create_rename_symlink_case + TEST $GEOREP_CLI $master $slave start + EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active" + EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive" +diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc +index 396b4c4..d723129 100644 +--- a/tests/geo-rep.rc ++++ b/tests/geo-rep.rc +@@ -19,6 +19,18 @@ function check_common_secret_file() + echo $? + } + ++function create_rename_symlink_case() ++{ ++ mkdir ${mastermnt}/MUL_REN_SYMLINK ++ cd ${mastermnt}/MUL_REN_SYMLINK ++ mkdir sym_dir1 ++ ln -s "sym_dir1" sym1 ++ mv sym1 sym2 ++ mv sym2 sym3 ++ mv sym3 sym4 ++ cd - ++} ++ + function create_data() + { + prefix=$1 +-- +1.8.3.1 + diff --git a/SOURCES/0051-spec-update-rpm-install-condition.patch b/SOURCES/0051-spec-update-rpm-install-condition.patch new file mode 100644 index 0000000..8d5ce47 --- /dev/null +++ b/SOURCES/0051-spec-update-rpm-install-condition.patch @@ -0,0 +1,67 @@ +From 71f4d55770287288f39b31a0435916ac3d9f742b Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya +Date: Fri, 5 Apr 2019 22:27:52 +0530 +Subject: [PATCH 51/52] spec: update rpm install condition + +Update code to allow rpm install without gluster process shutdown. + +Label: DOWNSTREAM ONLY + +BUG: 1493284 +Change-Id: I308e7e4629a2428927a6df34536e3cd645a54f8c +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/167089 +Tested-by: RHGS Build Bot +Reviewed-by: Milind Changire +--- + glusterfs.spec.in | 34 ---------------------------------- + 1 file changed, 34 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 7c7f7c0..0d57b49 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1629,40 +1629,6 @@ if not (ok == 0) then + end + + +-%pretrans devel -p +-if not posix.access("/bin/bash", "x") then +- -- initial installation, no shell, no running glusterfsd +- return 0 +-end +- +--- TODO: move this completely to a lua script +--- For now, we write a temporary bash script and execute that. +- +-script = [[#!/bin/sh +-pidof -c -o %PPID -x glusterfsd &>/dev/null +- +-if [ $? -eq 0 ]; then +- pushd . > /dev/null 2>&1 +- for volume in /var/lib/glusterd/vols/*; do cd $volume; +- vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` +- volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` +- if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then +- exit 1; +- fi +- done +- +- popd > /dev/null 2>&1 +- exit 1; +-fi +-]] +- +-ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) +-end +- +- +- + %pretrans fuse -p + if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd +-- +1.8.3.1 + diff --git a/SOURCES/0052-geo-rep-IPv6-support.patch b/SOURCES/0052-geo-rep-IPv6-support.patch new file mode 100644 index 0000000..12c6e1b --- /dev/null +++ b/SOURCES/0052-geo-rep-IPv6-support.patch @@ -0,0 +1,299 @@ +From d7bb933742f4d9135621590bf13713633c549af1 Mon Sep 17 00:00:00 2001 +From: Aravinda VK +Date: Thu, 14 Mar 2019 20:06:54 +0530 +Subject: [PATCH 52/52] geo-rep: IPv6 support + +`address_family=inet6` needs to be added while mounting master and +slave volumes in gverify script. + +New option introduced to gluster cli(`--inet6`) which will be used +internally by geo-rep while calling `gluster volume info +--remote-host=`. + +Backport of https://review.gluster.org/22363 + +Fixes: bz#1688231 +Change-Id: I1e0d42cae07158df043e64a2f991882d8c897837 +Signed-off-by: Aravinda VK +Reviewed-on: https://code.engineering.redhat.com/gerrit/167120 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli.c | 11 ++++++++++ + cli/src/cli.h | 1 + + geo-replication/src/gverify.sh | 22 ++++++++++++++++---- + geo-replication/syncdaemon/argsupgrade.py | 13 +++++++++++- + geo-replication/syncdaemon/gsyncd.py | 1 + + geo-replication/syncdaemon/subcmds.py | 9 +++++++-- + xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 30 ++++++++++++++++++++++++++-- + 7 files changed, 78 insertions(+), 9 deletions(-) + +diff --git a/cli/src/cli.c b/cli/src/cli.c +index 08f117e..c33d152 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -433,6 +433,12 @@ cli_opt_parse(char *opt, struct cli_state *state) + return 0; + } + ++ oarg = strtail(opt, "inet6"); ++ if (oarg) { ++ state->address_family = "inet6"; ++ return 0; ++ } ++ + oarg = strtail(opt, "log-file="); + if (oarg) { + state->log_file = oarg; +@@ -679,6 +685,11 @@ cli_rpc_init(struct cli_state *state) + this = THIS; + cli_rpc_prog = &cli_prog; + ++ /* If address family specified in CLI */ ++ if (state->address_family) { ++ addr_family = state->address_family; ++ } ++ + /* Connect to glusterd using the specified method, giving preference + * to a unix socket connection. If nothing is specified, connect to + * the default glusterd socket. +diff --git a/cli/src/cli.h b/cli/src/cli.h +index 5df86d5..b79a0a2 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -136,6 +136,7 @@ struct cli_state { + gf_loglevel_t log_level; + + char *glusterd_sock; ++ char *address_family; + }; + + struct cli_local { +diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh +index d048de0..7c88f9f 100755 +--- a/geo-replication/src/gverify.sh ++++ b/geo-replication/src/gverify.sh +@@ -94,6 +94,7 @@ echo $cmd_line; + function master_stats() + { + MASTERVOL=$1; ++ local inet6=$2; + local d; + local i; + local disk_size; +@@ -102,7 +103,12 @@ function master_stats() + local m_status; + + d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null); +- glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id $MASTERVOL -l $master_log_file $d; ++ if [ "$inet6" = "inet6" ]; then ++ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-id $MASTERVOL -l $master_log_file $d; ++ else ++ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id $MASTERVOL -l $master_log_file $d; ++ fi ++ + i=$(get_inode_num $d); + if [[ "$i" -ne "1" ]]; then + echo 0:0; +@@ -124,12 +130,18 @@ function slave_stats() + SLAVEUSER=$1; + SLAVEHOST=$2; + SLAVEVOL=$3; ++ local inet6=$4; + local cmd_line; + local ver; + local status; + + d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null); +- glusterfs --xlator-option="*dht.lookup-unhashed=off" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d; ++ if [ "$inet6" = "inet6" ]; then ++ glusterfs --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d; ++ else ++ glusterfs --xlator-option="*dht.lookup-unhashed=off" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d; ++ fi ++ + i=$(get_inode_num $d); + if [[ "$i" -ne "1" ]]; then + echo 0:0; +@@ -167,6 +179,8 @@ function main() + log_file=$6 + > $log_file + ++ inet6=$7 ++ + # Use FORCE_BLOCKER flag in the error message to differentiate + # between the errors which the force command should bypass + +@@ -204,8 +218,8 @@ function main() + fi; + + ERRORS=0; +- master_data=$(master_stats $1); +- slave_data=$(slave_stats $2 $3 $4); ++ master_data=$(master_stats $1 ${inet6}); ++ slave_data=$(slave_stats $2 $3 $4 ${inet6}); + master_disk_size=$(echo $master_data | cut -f1 -d':'); + slave_disk_size=$(echo $slave_data | cut -f1 -d':'); + master_used_size=$(echo $master_data | cut -f2 -d':'); +diff --git a/geo-replication/syncdaemon/argsupgrade.py b/geo-replication/syncdaemon/argsupgrade.py +index 4018143..7af4063 100644 +--- a/geo-replication/syncdaemon/argsupgrade.py ++++ b/geo-replication/syncdaemon/argsupgrade.py +@@ -84,6 +84,10 @@ def upgrade(): + # fail when it does stat to check the existence. + init_gsyncd_template_conf() + ++ inet6 = False ++ if "--inet6" in sys.argv: ++ inet6 = True ++ + if "--monitor" in sys.argv: + # python gsyncd.py --path=/bricks/b1 + # --monitor -c gsyncd.conf +@@ -147,8 +151,11 @@ def upgrade(): + + user, hname = remote_addr.split("@") + ++ if not inet6: ++ hname = gethostbyname(hname) ++ + print(("ssh://%s@%s:gluster://127.0.0.1:%s" % ( +- user, gethostbyname(hname), vol))) ++ user, hname, vol))) + + sys.exit(0) + elif "--normalize-url" in sys.argv: +@@ -346,3 +353,7 @@ def upgrade(): + + if pargs.reset_sync_time: + sys.argv.append("--reset-sync-time") ++ ++ if inet6: ++ # Add `--inet6` as first argument ++ sys.argv = [sys.argv[0], "--inet6"] + sys.argv[1:] +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index 037f351..effe0ce 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -47,6 +47,7 @@ def main(): + sys.exit(0) + + parser = ArgumentParser() ++ parser.add_argument("--inet6", action="store_true") + sp = parser.add_subparsers(dest="subcmd") + + # Monitor Status File update +diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py +index 30050ec..4ece7e0 100644 +--- a/geo-replication/syncdaemon/subcmds.py ++++ b/geo-replication/syncdaemon/subcmds.py +@@ -110,8 +110,13 @@ def subcmd_voluuidget(args): + + ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError + +- po = Popen(['gluster', '--xml', '--remote-host=' + args.host, +- 'volume', 'info', args.volname], bufsize=0, ++ cmd = ['gluster', '--xml', '--remote-host=' + args.host, ++ 'volume', 'info', args.volname] ++ ++ if args.inet6: ++ cmd.append("--inet6") ++ ++ po = Popen(cmd, bufsize=0, + stdin=None, stdout=PIPE, stderr=PIPE, + universal_newlines=True) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +index 49baa58..0f40bea 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c ++++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +@@ -76,6 +76,19 @@ static char *gsync_reserved_opts[] = {"gluster-command", + static char *gsync_no_restart_opts[] = {"checkpoint", "log_rsync_performance", + "log-rsync-performance", NULL}; + ++void ++set_gsyncd_inet6_arg(runner_t *runner) ++{ ++ xlator_t *this = NULL; ++ char *af; ++ int ret; ++ ++ this = THIS; ++ ret = dict_get_str(this->options, "transport.address-family", &af); ++ if (ret == 0) ++ runner_argprintf(runner, "--%s", af); ++} ++ + int + __glusterd_handle_sys_exec(rpcsvc_request_t *req) + { +@@ -384,6 +397,7 @@ glusterd_urltransform_init(runner_t *runner, const char *transname) + { + runinit(runner); + runner_add_arg(runner, GSYNCD_PREFIX "/gsyncd"); ++ set_gsyncd_inet6_arg(runner); + runner_argprintf(runner, "--%s-url", transname); + } + +@@ -725,6 +739,7 @@ glusterd_get_slave_voluuid(char *slave_host, char *slave_vol, char *vol_uuid) + + runinit(&runner); + runner_add_arg(&runner, GSYNCD_PREFIX "/gsyncd"); ++ set_gsyncd_inet6_arg(&runner); + runner_add_arg(&runner, "--slavevoluuid-get"); + runner_argprintf(&runner, "%s::%s", slave_host, slave_vol); + +@@ -788,6 +803,7 @@ glusterd_gsync_get_config(char *master, char *slave, char *conf_path, + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); ++ set_gsyncd_inet6_arg(&runner); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + runner_argprintf(&runner, ":%s", master); + runner_add_args(&runner, slave, "--config-get-all", NULL); +@@ -917,6 +933,7 @@ glusterd_gsync_get_status(char *master, char *slave, char *conf_path, + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); ++ set_gsyncd_inet6_arg(&runner); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + runner_argprintf(&runner, ":%s", master); + runner_add_args(&runner, slave, "--status-get", NULL); +@@ -937,6 +954,7 @@ glusterd_gsync_get_param_file(char *prmfile, const char *param, char *master, + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); ++ set_gsyncd_inet6_arg(&runner); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + runner_argprintf(&runner, ":%s", master); + runner_add_args(&runner, slave, "--config-get", NULL); +@@ -2811,6 +2829,7 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol, + char *slave_ip = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; ++ char *af = NULL; + + this = THIS; + GF_ASSERT(this); +@@ -2852,9 +2871,16 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol, + runner_argprintf(&runner, "%s", slave_vol); + runner_argprintf(&runner, "%d", ssh_port); + runner_argprintf(&runner, "%s", log_file_path); +- gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s", ++ ret = dict_get_str(this->options, "transport.address-family", &af); ++ if (ret) ++ af = "-"; ++ ++ runner_argprintf(&runner, "%s", af); ++ ++ gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s %s", + runner.argv[0], runner.argv[1], runner.argv[2], runner.argv[3], +- runner.argv[4], runner.argv[5], runner.argv[6]); ++ runner.argv[4], runner.argv[5], runner.argv[6], ++ runner.argv[7]); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + synclock_unlock(&priv->big_lock); + ret = runner_run(&runner); +-- +1.8.3.1 + diff --git a/SOURCES/0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch b/SOURCES/0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch new file mode 100644 index 0000000..337370d --- /dev/null +++ b/SOURCES/0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch @@ -0,0 +1,575 @@ +From 1fb89973551937f34f24b45e07072a6ce6c30ff9 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Mon, 16 Oct 2017 14:18:31 +0530 +Subject: [PATCH 053/124] Revert "packaging: (ganesha) remove glusterfs-ganesha + subpackage and related files)" + +This reverts commit 0cf2963f12a8b540a7042605d8c79f638fdf6cee. + +Label: DOWNSTREAM ONLY + +Change-Id: Id6e7585021bd4dd78a59580cfa4838bdd4e539a0 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167102 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + configure.ac | 3 + + extras/Makefile.am | 3 +- + extras/ganesha/Makefile.am | 2 + + extras/ganesha/config/Makefile.am | 4 + + extras/ganesha/config/ganesha-ha.conf.sample | 19 ++++ + extras/ganesha/scripts/Makefile.am | 4 + + extras/ganesha/scripts/create-export-ganesha.sh | 91 +++++++++++++++ + extras/ganesha/scripts/dbus-send.sh | 60 ++++++++++ + extras/ganesha/scripts/generate-epoch.py | 48 ++++++++ + extras/hook-scripts/start/post/Makefile.am | 2 +- + extras/hook-scripts/start/post/S31ganesha-start.sh | 122 +++++++++++++++++++++ + glusterfs.spec.in | 44 +++++++- + 12 files changed, 396 insertions(+), 6 deletions(-) + create mode 100644 extras/ganesha/Makefile.am + create mode 100644 extras/ganesha/config/Makefile.am + create mode 100644 extras/ganesha/config/ganesha-ha.conf.sample + create mode 100644 extras/ganesha/scripts/Makefile.am + create mode 100755 extras/ganesha/scripts/create-export-ganesha.sh + create mode 100755 extras/ganesha/scripts/dbus-send.sh + create mode 100755 extras/ganesha/scripts/generate-epoch.py + create mode 100755 extras/hook-scripts/start/post/S31ganesha-start.sh + +diff --git a/configure.ac b/configure.ac +index 0d06f5a..125ae29 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -196,6 +196,9 @@ AC_CONFIG_FILES([Makefile + extras/init.d/glustereventsd-Debian + extras/init.d/glustereventsd-Redhat + extras/init.d/glustereventsd-FreeBSD ++ extras/ganesha/Makefile ++ extras/ganesha/config/Makefile ++ extras/ganesha/scripts/Makefile + extras/systemd/Makefile + extras/systemd/glusterd.service + extras/systemd/glustereventsd.service +diff --git a/extras/Makefile.am b/extras/Makefile.am +index ff5ca9b..983f014 100644 +--- a/extras/Makefile.am ++++ b/extras/Makefile.am +@@ -11,7 +11,8 @@ EditorModedir = $(docdir) + EditorMode_DATA = glusterfs-mode.el glusterfs.vim + + SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \ +- $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python ++ $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \ ++ ganesha + + confdir = $(sysconfdir)/glusterfs + if WITH_SERVER +diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am +new file mode 100644 +index 0000000..542de68 +--- /dev/null ++++ b/extras/ganesha/Makefile.am +@@ -0,0 +1,2 @@ ++SUBDIRS = scripts config ++CLEANFILES = +diff --git a/extras/ganesha/config/Makefile.am b/extras/ganesha/config/Makefile.am +new file mode 100644 +index 0000000..c729273 +--- /dev/null ++++ b/extras/ganesha/config/Makefile.am +@@ -0,0 +1,4 @@ ++EXTRA_DIST= ganesha-ha.conf.sample ++ ++confdir = $(sysconfdir)/ganesha ++conf_DATA = ganesha-ha.conf.sample +diff --git a/extras/ganesha/config/ganesha-ha.conf.sample b/extras/ganesha/config/ganesha-ha.conf.sample +new file mode 100644 +index 0000000..c22892b +--- /dev/null ++++ b/extras/ganesha/config/ganesha-ha.conf.sample +@@ -0,0 +1,19 @@ ++# Name of the HA cluster created. ++# must be unique within the subnet ++HA_NAME="ganesha-ha-360" ++# ++# N.B. you may use short names or long names; you may not use IP addrs. ++# Once you select one, stay with it as it will be mildly unpleasant to ++# clean up if you switch later on. Ensure that all names - short and/or ++# long - are in DNS or /etc/hosts on all machines in the cluster. ++# ++# The subset of nodes of the Gluster Trusted Pool that form the ganesha ++# HA cluster. Hostname is specified. ++HA_CLUSTER_NODES="server1,server2,..." ++#HA_CLUSTER_NODES="server1.lab.redhat.com,server2.lab.redhat.com,..." ++# ++# Virtual IPs for each of the nodes specified above. ++VIP_server1="10.0.2.1" ++VIP_server2="10.0.2.2" ++#VIP_server1_lab_redhat_com="10.0.2.1" ++#VIP_server2_lab_redhat_com="10.0.2.2" +diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am +new file mode 100644 +index 0000000..00a2c45 +--- /dev/null ++++ b/extras/ganesha/scripts/Makefile.am +@@ -0,0 +1,4 @@ ++EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh ++ ++scriptsdir = $(libexecdir)/ganesha ++scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py +diff --git a/extras/ganesha/scripts/create-export-ganesha.sh b/extras/ganesha/scripts/create-export-ganesha.sh +new file mode 100755 +index 0000000..1ffba42 +--- /dev/null ++++ b/extras/ganesha/scripts/create-export-ganesha.sh +@@ -0,0 +1,91 @@ ++#!/bin/bash ++ ++#This script is called by glusterd when the user ++#tries to export a volume via NFS-Ganesha. ++#An export file specific to a volume ++#is created in GANESHA_DIR/exports. ++ ++# Try loading the config from any of the distro ++# specific configuration locations ++if [ -f /etc/sysconfig/ganesha ] ++ then ++ . /etc/sysconfig/ganesha ++fi ++if [ -f /etc/conf.d/ganesha ] ++ then ++ . /etc/conf.d/ganesha ++fi ++if [ -f /etc/default/ganesha ] ++ then ++ . /etc/default/ganesha ++fi ++ ++GANESHA_DIR=${1%/} ++OPTION=$2 ++VOL=$3 ++CONF=$GANESHA_DIR"/ganesha.conf" ++declare -i EXPORT_ID ++ ++function check_cmd_status() ++{ ++ if [ "$1" != "0" ] ++ then ++ rm -rf $GANESHA_DIR/exports/export.$VOL.conf ++ sed -i /$VOL.conf/d $CONF ++ exit 1 ++ fi ++} ++ ++ ++if [ ! -d "$GANESHA_DIR/exports" ]; ++ then ++ mkdir $GANESHA_DIR/exports ++ check_cmd_status `echo $?` ++fi ++ ++function write_conf() ++{ ++echo -e "# WARNING : Using Gluster CLI will overwrite manual ++# changes made to this file. To avoid it, edit the ++# file and run ganesha-ha.sh --refresh-config." ++ ++echo "EXPORT{" ++echo " Export_Id = 2;" ++echo " Path = \"/$VOL\";" ++echo " FSAL {" ++echo " name = "GLUSTER";" ++echo " hostname=\"localhost\";" ++echo " volume=\"$VOL\";" ++echo " }" ++echo " Access_type = RW;" ++echo " Disable_ACL = true;" ++echo ' Squash="No_root_squash";' ++echo " Pseudo=\"/$VOL\";" ++echo ' Protocols = "3", "4" ;' ++echo ' Transports = "UDP","TCP";' ++echo ' SecType = "sys";' ++echo " }" ++} ++if [ "$OPTION" = "on" ]; ++then ++ if ! (cat $CONF | grep $VOL.conf\"$ ) ++ then ++ write_conf $@ > $GANESHA_DIR/exports/export.$VOL.conf ++ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF ++ count=`ls -l $GANESHA_DIR/exports/*.conf | wc -l` ++ if [ "$count" = "1" ] ; then ++ EXPORT_ID=2 ++ else ++ EXPORT_ID=`cat $GANESHA_DIR/.export_added` ++ check_cmd_status `echo $?` ++ EXPORT_ID=EXPORT_ID+1 ++ sed -i s/Export_Id.*/"Export_Id= $EXPORT_ID ;"/ \ ++ $GANESHA_DIR/exports/export.$VOL.conf ++ check_cmd_status `echo $?` ++ fi ++ echo $EXPORT_ID > $GANESHA_DIR/.export_added ++ fi ++else ++ rm -rf $GANESHA_DIR/exports/export.$VOL.conf ++ sed -i /$VOL.conf/d $CONF ++fi +diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh +new file mode 100755 +index 0000000..ec8d948 +--- /dev/null ++++ b/extras/ganesha/scripts/dbus-send.sh +@@ -0,0 +1,60 @@ ++#!/bin/bash ++ ++# Try loading the config from any of the distro ++# specific configuration locations ++if [ -f /etc/sysconfig/ganesha ] ++ then ++ . /etc/sysconfig/ganesha ++fi ++if [ -f /etc/conf.d/ganesha ] ++ then ++ . /etc/conf.d/ganesha ++fi ++if [ -f /etc/default/ganesha ] ++ then ++ . /etc/default/ganesha ++fi ++ ++GANESHA_DIR=${1%/} ++OPTION=$2 ++VOL=$3 ++CONF=$GANESHA_DIR"/ganesha.conf" ++ ++function check_cmd_status() ++{ ++ if [ "$1" != "0" ] ++ then ++ logger "dynamic export failed on node :${hostname -s}" ++ fi ++} ++ ++#This function keeps track of export IDs and increments it with every new entry ++function dynamic_export_add() ++{ ++ dbus-send --system \ ++--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ ++org.ganesha.nfsd.exportmgr.AddExport string:$GANESHA_DIR/exports/export.$VOL.conf \ ++string:"EXPORT(Path=/$VOL)" ++ check_cmd_status `echo $?` ++} ++ ++#This function removes an export dynamically(uses the export_id of the export) ++function dynamic_export_remove() ++{ ++ removed_id=`cat $GANESHA_DIR/exports/export.$VOL.conf |\ ++grep Export_Id | awk -F"[=,;]" '{print$2}'| tr -d '[[:space:]]'` ++ dbus-send --print-reply --system \ ++--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ ++org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id ++ check_cmd_status `echo $?` ++} ++ ++if [ "$OPTION" = "on" ]; ++then ++ dynamic_export_add $@ ++fi ++ ++if [ "$OPTION" = "off" ]; ++then ++ dynamic_export_remove $@ ++fi +diff --git a/extras/ganesha/scripts/generate-epoch.py b/extras/ganesha/scripts/generate-epoch.py +new file mode 100755 +index 0000000..5db5e56 +--- /dev/null ++++ b/extras/ganesha/scripts/generate-epoch.py +@@ -0,0 +1,48 @@ ++#!/usr/bin/python ++# ++# Copyright (c) 2016 Red Hat, Inc. ++# This file is part of GlusterFS. ++# ++# This file is licensed to you under your choice of the GNU Lesser ++# General Public License, version 3 or any later version (LGPLv3 or ++# later), or the GNU General Public License, version 2 (GPLv2), in all ++# cases as published by the Free Software Foundation. ++# ++# Generates unique epoch value on each gluster node to be used by ++# nfs-ganesha service on that node. ++# ++# Configure 'EPOCH_EXEC' option to this script path in ++# '/etc/sysconfig/ganesha' file used by nfs-ganesha service. ++# ++# Construct epoch as follows - ++# first 32-bit contains the now() time ++# rest 32-bit value contains the local glusterd node uuid ++ ++import time ++import binascii ++ ++# Calculate the now() time into a 64-bit integer value ++def epoch_now(): ++ epoch_time = int(time.mktime(time.localtime())) << 32 ++ return epoch_time ++ ++# Read glusterd UUID and extract first 32-bit of it ++def epoch_uuid(): ++ file_name = '/var/lib/glusterd/glusterd.info' ++ ++ for line in open(file_name): ++ if "UUID" in line: ++ glusterd_uuid = line.split('=')[1].strip() ++ ++ uuid_bin = binascii.unhexlify(glusterd_uuid.replace("-","")) ++ ++ epoch_uuid = int(uuid_bin.encode('hex'), 32) & 0xFFFF0000 ++ return epoch_uuid ++ ++# Construct epoch as follows - ++# first 32-bit contains the now() time ++# rest 32-bit value contains the local glusterd node uuid ++epoch = (epoch_now() | epoch_uuid()) ++print str(epoch) ++ ++exit(0) +diff --git a/extras/hook-scripts/start/post/Makefile.am b/extras/hook-scripts/start/post/Makefile.am +index e32546d..792019d 100644 +--- a/extras/hook-scripts/start/post/Makefile.am ++++ b/extras/hook-scripts/start/post/Makefile.am +@@ -1,4 +1,4 @@ +-EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh ++EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S31ganesha-start.sh + + hookdir = $(GLUSTERD_WORKDIR)/hooks/1/start/post/ + if WITH_SERVER +diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh +new file mode 100755 +index 0000000..90ba6bc +--- /dev/null ++++ b/extras/hook-scripts/start/post/S31ganesha-start.sh +@@ -0,0 +1,122 @@ ++#!/bin/bash ++PROGNAME="Sganesha-start" ++OPTSPEC="volname:,gd-workdir:" ++VOL= ++declare -i EXPORT_ID ++ganesha_key="ganesha.enable" ++GANESHA_DIR="/var/run/gluster/shared_storage/nfs-ganesha" ++CONF1="$GANESHA_DIR/ganesha.conf" ++GLUSTERD_WORKDIR= ++ ++function parse_args () ++{ ++ ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@) ++ eval set -- "$ARGS" ++ ++ while true; do ++ case $1 in ++ --volname) ++ shift ++ VOL=$1 ++ ;; ++ --gd-workdir) ++ shift ++ GLUSTERD_WORKDIR=$1 ++ ;; ++ *) ++ shift ++ break ++ ;; ++ esac ++ shift ++ done ++} ++ ++ ++ ++#This function generates a new export entry as export.volume_name.conf ++function write_conf() ++{ ++echo -e "# WARNING : Using Gluster CLI will overwrite manual ++# changes made to this file. To avoid it, edit the ++# file, copy it over to all the NFS-Ganesha nodes ++# and run ganesha-ha.sh --refresh-config." ++ ++echo "EXPORT{" ++echo " Export_Id = 2;" ++echo " Path = \"/$VOL\";" ++echo " FSAL {" ++echo " name = \"GLUSTER\";" ++echo " hostname=\"localhost\";" ++echo " volume=\"$VOL\";" ++echo " }" ++echo " Access_type = RW;" ++echo " Disable_ACL = true;" ++echo " Squash=\"No_root_squash\";" ++echo " Pseudo=\"/$VOL\";" ++echo " Protocols = \"3\", \"4\" ;" ++echo " Transports = \"UDP\",\"TCP\";" ++echo " SecType = \"sys\";" ++echo "}" ++} ++ ++#It adds the export dynamically by sending dbus signals ++function export_add() ++{ ++ dbus-send --print-reply --system --dest=org.ganesha.nfsd \ ++/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \ ++string:$GANESHA_DIR/exports/export.$VOL.conf string:"EXPORT(Export_Id=$EXPORT_ID)" ++ ++} ++ ++# based on src/scripts/ganeshactl/Ganesha/export_mgr.py ++function is_exported() ++{ ++ local volume="${1}" ++ ++ dbus-send --type=method_call --print-reply --system \ ++ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ ++ org.ganesha.nfsd.exportmgr.ShowExports \ ++ | grep -w -q "/${volume}" ++ ++ return $? ++} ++ ++# Check the info file (contains the volume options) to see if Ganesha is ++# enabled for this volume. ++function ganesha_enabled() ++{ ++ local volume="${1}" ++ local info_file="${GLUSTERD_WORKDIR}/vols/${VOL}/info" ++ local enabled="off" ++ ++ enabled=$(grep -w ${ganesha_key} ${info_file} | cut -d"=" -f2) ++ ++ [ "${enabled}" == "on" ] ++ ++ return $? ++} ++ ++parse_args $@ ++ ++if ganesha_enabled ${VOL} && ! is_exported ${VOL} ++then ++ if [ ! -e ${GANESHA_DIR}/exports/export.${VOL}.conf ] ++ then ++ #Remove export entry from nfs-ganesha.conf ++ sed -i /$VOL.conf/d $CONF1 ++ write_conf ${VOL} > ${GANESHA_DIR}/exports/export.${VOL}.conf ++ EXPORT_ID=`cat $GANESHA_DIR/.export_added` ++ EXPORT_ID=EXPORT_ID+1 ++ echo $EXPORT_ID > $GANESHA_DIR/.export_added ++ sed -i s/Export_Id.*/"Export_Id=$EXPORT_ID;"/ \ ++ $GANESHA_DIR/exports/export.$VOL.conf ++ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF1 ++ else ++ EXPORT_ID=$(grep ^[[:space:]]*Export_Id $GANESHA_DIR/exports/export.$VOL.conf |\ ++ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') ++ fi ++ export_add $VOL ++fi ++ ++exit 0 +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 0d57b49..dd7438c 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -299,7 +299,6 @@ Obsoletes: hekafs + Obsoletes: %{name}-common < %{version}-%{release} + Obsoletes: %{name}-core < %{version}-%{release} + Obsoletes: %{name}-ufo +-Obsoletes: %{name}-ganesha + %if ( 0%{!?_with_gnfs:1} ) + Obsoletes: %{name}-gnfs + %endif +@@ -455,6 +454,30 @@ is in user space and easily manageable. + This package provides support to FUSE based clients and inlcudes the + glusterfs(d) binary. + ++%if ( 0%{!?_without_server:1} ) ++%package ganesha ++Summary: NFS-Ganesha configuration ++Group: Applications/File ++ ++Requires: %{name}-server%{?_isa} = %{version}-%{release} ++Requires: nfs-ganesha-gluster, pcs, dbus ++%if ( 0%{?rhel} && 0%{?rhel} == 6 ) ++Requires: cman, pacemaker, corosync ++%endif ++ ++%description ganesha ++GlusterFS is a distributed file-system capable of scaling to several ++petabytes. It aggregates various storage bricks over Infiniband RDMA ++or TCP/IP interconnect into one large parallel network file ++system. GlusterFS is one of the most sophisticated file systems in ++terms of features and extensibility. It borrows a powerful concept ++called Translators from GNU Hurd kernel. Much of the code in GlusterFS ++is in user space and easily manageable. ++ ++This package provides the configuration and related files for using ++NFS-Ganesha as the NFS server using GlusterFS ++%endif ++ + %if ( 0%{!?_without_georeplication:1} ) + %package geo-replication + Summary: GlusterFS Geo-replication +@@ -1111,6 +1134,12 @@ exit 0 + %endif + %endif + ++%if ( 0%{?_without_server:1} ) ++#exclude ganesha related files ++%exclude %{_sysconfdir}/ganesha/* ++%exclude %{_libexecdir}/ganesha/* ++%endif ++ + %files api + %exclude %{_libdir}/*.so + # libgfapi files +@@ -1273,6 +1302,12 @@ exit 0 + %exclude %{_datadir}/glusterfs/tests/vagrant + %endif + ++%if ( 0%{!?_without_server:1} ) ++%files ganesha ++%{_sysconfdir}/ganesha/* ++%{_libexecdir}/ganesha/* ++%endif ++ + %if ( 0%{!?_without_ocf:1} ) + %files resource-agents + # /usr/lib is the standard for OCF, also on x86_64 +@@ -1396,6 +1431,7 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S29CTDBsetup.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S30samba-start.sh ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post +@@ -1868,6 +1904,9 @@ fi + %endif + + %changelog ++* Fri Apr 5 2019 Jiffin Tony Thottan ++- Adding ganesha bits back in gluster repository ++ + * Wed Mar 6 2019 Kaleb S. KEITHLEY + - remove unneeded ldconfig in scriptlets + - reported by Igor Gnatenko in Fedora +@@ -1960,9 +1999,6 @@ fi + * Thu Feb 16 2017 Niels de Vos + - Obsolete and Provide python-gluster for upgrading from glusterfs < 3.10 + +-* Tue Feb 7 2017 Kaleb S. KEITHLEY +-- remove ganesha (#1418417) +- + * Wed Feb 1 2017 Poornima G + - Install /var/lib/glusterd/groups/metadata-cache by default + +-- +1.8.3.1 + diff --git a/SOURCES/0054-Revert-glusterd-storhaug-remove-ganesha.patch b/SOURCES/0054-Revert-glusterd-storhaug-remove-ganesha.patch new file mode 100644 index 0000000..261856c --- /dev/null +++ b/SOURCES/0054-Revert-glusterd-storhaug-remove-ganesha.patch @@ -0,0 +1,1912 @@ +From 1029c27982d2f91cb2d3c4fcc19aa5171111dfb9 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Mon, 16 Oct 2017 14:24:29 +0530 +Subject: [PATCH 054/124] Revert "glusterd: (storhaug) remove ganesha" + +This reverts commit 843e1b04b554ab887ec656ae7b468bb93ee4e2f7. + +Label: DOWNSTREAM ONLY + +Change-Id: I06b5450344c33f26da3d94b6f67051d41dfbba17 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167103 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-global.c | 57 ++ + cli/src/cli-cmd-parser.c | 122 ++- + cli/src/cli-cmd.c | 3 +- + cli/src/cli-rpc-ops.c | 82 ++ + cli/src/cli.h | 4 + + xlators/mgmt/glusterd/src/Makefile.am | 4 +- + xlators/mgmt/glusterd/src/glusterd-errno.h | 2 +- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 915 +++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-handler.c | 79 ++ + xlators/mgmt/glusterd/src/glusterd-messages.h | 2 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 45 +- + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 196 +++++ + xlators/mgmt/glusterd/src/glusterd-store.h | 2 + + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 37 + + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 7 + + xlators/mgmt/glusterd/src/glusterd.h | 22 + + 16 files changed, 1568 insertions(+), 11 deletions(-) + create mode 100644 xlators/mgmt/glusterd/src/glusterd-ganesha.c + +diff --git a/cli/src/cli-cmd-global.c b/cli/src/cli-cmd-global.c +index d0729ac..270b76f 100644 +--- a/cli/src/cli-cmd-global.c ++++ b/cli/src/cli-cmd-global.c +@@ -36,6 +36,10 @@ int + cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word, + const char **words, int wordcount); + ++int ++cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word, ++ const char **words, int wordcount); ++ + struct cli_cmd global_cmds[] = { + { + "global help", +@@ -48,6 +52,11 @@ struct cli_cmd global_cmds[] = { + cli_cmd_get_state_cbk, + "Get local state representation of mentioned daemon", + }, ++ { ++ "nfs-ganesha {enable| disable} ", ++ cli_cmd_ganesha_cbk, ++ "Enable/disable NFS-Ganesha support", ++ }, + {NULL, NULL, NULL}}; + + int +@@ -89,6 +98,54 @@ out: + } + + int ++cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word, ++ const char **words, int wordcount) ++ ++{ ++ int sent = 0; ++ int parse_error = 0; ++ int ret = -1; ++ rpc_clnt_procedure_t *proc = NULL; ++ call_frame_t *frame = NULL; ++ dict_t *options = NULL; ++ cli_local_t *local = NULL; ++ char *op_errstr = NULL; ++ ++ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GANESHA]; ++ ++ frame = create_frame(THIS, THIS->ctx->pool); ++ if (!frame) ++ goto out; ++ ++ ret = cli_cmd_ganesha_parse(state, words, wordcount, &options, &op_errstr); ++ if (ret) { ++ if (op_errstr) { ++ cli_err("%s", op_errstr); ++ GF_FREE(op_errstr); ++ } else ++ cli_usage_out(word->pattern); ++ parse_error = 1; ++ goto out; ++ } ++ ++ CLI_LOCAL_INIT(local, words, frame, options); ++ ++ if (proc->fn) { ++ ret = proc->fn(frame, THIS, options); ++ } ++ ++out: ++ if (ret) { ++ cli_cmd_sent_status_get(&sent); ++ if ((sent == 0) && (parse_error == 0)) ++ cli_out("Setting global option failed"); ++ } ++ ++ CLI_STACK_DESTROY(frame); ++ return ret; ++} ++ ++int + cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word, + const char **words, int wordcount) + { +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index d9ccba1..cd9c445 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1694,7 +1694,7 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + } + } + +- if ((strcmp (key, "cluster.brick-multiplex") == 0)) { ++ if ((strcmp(key, "cluster.brick-multiplex") == 0)) { + question = + "Brick-multiplexing is supported only for " + "OCS converged or independent mode. Also it is " +@@ -1703,11 +1703,12 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + "are running before this option is modified." + "Do you still want to continue?"; + +- answer = cli_cmd_get_confirmation (state, question); ++ answer = cli_cmd_get_confirmation(state, question); + if (GF_ANSWER_NO == answer) { +- gf_log ("cli", GF_LOG_ERROR, "Operation " +- "cancelled, exiting"); +- *op_errstr = gf_strdup ("Aborted by user."); ++ gf_log("cli", GF_LOG_ERROR, ++ "Operation " ++ "cancelled, exiting"); ++ *op_errstr = gf_strdup("Aborted by user."); + ret = -1; + goto out; + } +@@ -5848,3 +5849,114 @@ out: + + return ret; + } ++ ++/* Parsing global option for NFS-Ganesha config ++ * gluster nfs-ganesha enable/disable */ ++ ++int32_t ++cli_cmd_ganesha_parse(struct cli_state *state, const char **words, ++ int wordcount, dict_t **options, char **op_errstr) ++{ ++ dict_t *dict = NULL; ++ int ret = -1; ++ char *key = NULL; ++ char *value = NULL; ++ char *w = NULL; ++ char *opwords[] = {"enable", "disable", NULL}; ++ const char *question = NULL; ++ gf_answer_t answer = GF_ANSWER_NO; ++ ++ GF_ASSERT(words); ++ GF_ASSERT(options); ++ ++ dict = dict_new(); ++ ++ if (!dict) ++ goto out; ++ ++ if (wordcount != 2) ++ goto out; ++ ++ key = (char *)words[0]; ++ value = (char *)words[1]; ++ ++ if (!key || !value) { ++ cli_out("Usage : nfs-ganesha "); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = gf_strip_whitespace(value, strlen(value)); ++ if (ret == -1) ++ goto out; ++ ++ if (strcmp(key, "nfs-ganesha")) { ++ gf_asprintf(op_errstr, ++ "Global option: error: ' %s '" ++ "is not a valid global option.", ++ key); ++ ret = -1; ++ goto out; ++ } ++ ++ w = str_getunamb(value, opwords); ++ if (!w) { ++ cli_out( ++ "Invalid global option \n" ++ "Usage : nfs-ganesha "); ++ ret = -1; ++ goto out; ++ } ++ ++ question = ++ "Enabling NFS-Ganesha requires Gluster-NFS to be" ++ " disabled across the trusted pool. Do you " ++ "still want to continue?\n"; ++ ++ if (strcmp(value, "enable") == 0) { ++ answer = cli_cmd_get_confirmation(state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_ERROR, ++ "Global operation " ++ "cancelled, exiting"); ++ ret = -1; ++ goto out; ++ } ++ } ++ cli_out("This will take a few minutes to complete. Please wait .."); ++ ++ ret = dict_set_str(dict, "key", key); ++ if (ret) { ++ gf_log(THIS->name, GF_LOG_ERROR, "dict set on key failed"); ++ goto out; ++ } ++ ++ ret = dict_set_str(dict, "value", value); ++ if (ret) { ++ gf_log(THIS->name, GF_LOG_ERROR, "dict set on value failed"); ++ goto out; ++ } ++ ++ ret = dict_set_str(dict, "globalname", "All"); ++ if (ret) { ++ gf_log(THIS->name, GF_LOG_ERROR, ++ "dict set on global" ++ " key failed."); ++ goto out; ++ } ++ ++ ret = dict_set_int32(dict, "hold_global_locks", _gf_true); ++ if (ret) { ++ gf_log(THIS->name, GF_LOG_ERROR, ++ "dict set on global key " ++ "failed."); ++ goto out; ++ } ++ ++ *options = dict; ++out: ++ if (ret) ++ dict_unref(dict); ++ ++ return ret; ++} +diff --git a/cli/src/cli-cmd.c b/cli/src/cli-cmd.c +index 2ee8b1b..8c06905 100644 +--- a/cli/src/cli-cmd.c ++++ b/cli/src/cli-cmd.c +@@ -366,7 +366,8 @@ cli_cmd_submit(struct rpc_clnt *rpc, void *req, call_frame_t *frame, + unsigned timeout = 0; + + if ((GLUSTER_CLI_PROFILE_VOLUME == procnum) || +- (GLUSTER_CLI_HEAL_VOLUME == procnum)) ++ (GLUSTER_CLI_HEAL_VOLUME == procnum) || ++ (GLUSTER_CLI_GANESHA == procnum)) + timeout = cli_ten_minutes_timeout; + else + timeout = cli_default_conn_timeout; +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 12e7fcc..736cd18 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -2207,6 +2207,62 @@ out: + return ret; + } + ++int ++gf_cli_ganesha_cbk(struct rpc_req *req, struct iovec *iov, int count, ++ void *myframe) ++{ ++ gf_cli_rsp rsp = { ++ 0, ++ }; ++ int ret = -1; ++ dict_t *dict = NULL; ++ ++ GF_ASSERT(myframe); ++ ++ if (-1 == req->rpc_status) { ++ goto out; ++ } ++ ++ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp); ++ if (ret < 0) { ++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR, ++ "Failed to decode xdr response"); ++ goto out; ++ } ++ ++ gf_log("cli", GF_LOG_DEBUG, "Received resp to ganesha"); ++ ++ dict = dict_new(); ++ ++ if (!dict) { ++ ret = -1; ++ goto out; ++ } ++ ++ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict); ++ if (ret) ++ goto out; ++ ++ if (rsp.op_ret) { ++ if (strcmp(rsp.op_errstr, "")) ++ cli_err("nfs-ganesha: failed: %s", rsp.op_errstr); ++ else ++ cli_err("nfs-ganesha: failed"); ++ } ++ ++ else { ++ cli_out("nfs-ganesha : success "); ++ } ++ ++ ret = rsp.op_ret; ++ ++out: ++ if (dict) ++ dict_unref(dict); ++ cli_cmd_broadcast_response(ret); ++ return ret; ++} ++ + char * + is_server_debug_xlator(void *myframe) + { +@@ -4880,6 +4936,31 @@ out: + } + + int32_t ++gf_cli_ganesha(call_frame_t *frame, xlator_t *this, void *data) ++{ ++ gf_cli_req req = {{ ++ 0, ++ }}; ++ int ret = 0; ++ dict_t *dict = NULL; ++ ++ if (!frame || !this || !data) { ++ ret = -1; ++ goto out; ++ } ++ ++ dict = data; ++ ++ ret = cli_to_glusterd(&req, frame, gf_cli_ganesha_cbk, ++ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_GANESHA, ++ this, cli_rpc_prog, NULL); ++out: ++ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret); ++ ++ return ret; ++} ++ ++int32_t + gf_cli_set_volume(call_frame_t *frame, xlator_t *this, void *data) + { + gf_cli_req req = {{ +@@ -12214,6 +12295,7 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = { + [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", gf_cli_sys_exec}, + [GLUSTER_CLI_SNAP] = {"SNAP", gf_cli_snapshot}, + [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER VOLUME", gf_cli_barrier_volume}, ++ [GLUSTER_CLI_GANESHA] = {"GANESHA", gf_cli_ganesha}, + [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", gf_cli_get_vol_opt}, + [GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot}, + [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", gf_cli_attach_tier}, +diff --git a/cli/src/cli.h b/cli/src/cli.h +index b79a0a2..37e4d9d 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -282,6 +282,10 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + int wordcount, dict_t **options, char **op_errstr); + + int32_t ++cli_cmd_ganesha_parse(struct cli_state *state, const char **words, ++ int wordcount, dict_t **options, char **op_errstr); ++ ++int32_t + cli_cmd_get_state_parse(struct cli_state *state, const char **words, + int wordcount, dict_t **options, char **op_errstr); + +diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am +index c8dd8e3..5fe5156 100644 +--- a/xlators/mgmt/glusterd/src/Makefile.am ++++ b/xlators/mgmt/glusterd/src/Makefile.am +@@ -10,7 +10,7 @@ glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(LIB_DL) + glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ + glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \ + glusterd-store.c glusterd-handshake.c glusterd-pmap.c \ +- glusterd-volgen.c glusterd-rebalance.c \ ++ glusterd-volgen.c glusterd-rebalance.c glusterd-ganesha.c \ + glusterd-quota.c glusterd-bitrot.c glusterd-geo-rep.c \ + glusterd-replace-brick.c glusterd-log-ops.c glusterd-tier.c \ + glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ +@@ -52,6 +52,8 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(CONTRIBDIR)/mount -I$(CONTRIBDIR)/userspace-rcu \ + -DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \ + -DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\" \ ++ -DCONFDIR=\"$(localstatedir)/run/gluster/shared_storage/nfs-ganesha\" \ ++ -DGANESHA_PREFIX=\"$(libexecdir)/ganesha\" \ + -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) + + +diff --git a/xlators/mgmt/glusterd/src/glusterd-errno.h b/xlators/mgmt/glusterd/src/glusterd-errno.h +index 7e1575b..c74070e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-errno.h ++++ b/xlators/mgmt/glusterd/src/glusterd-errno.h +@@ -27,7 +27,7 @@ enum glusterd_op_errno { + EG_ISSNAP = 30813, /* Volume is a snap volume */ + EG_GEOREPRUN = 30814, /* Geo-Replication is running */ + EG_NOTTHINP = 30815, /* Bricks are not thinly provisioned */ +- EG_NOGANESHA = 30816, /* obsolete ganesha is not enabled */ ++ EG_NOGANESHA = 30816, /* Global ganesha is not enabled */ + }; + + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +new file mode 100644 +index 0000000..fac16e6 +--- /dev/null ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -0,0 +1,915 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include ++#include "glusterd.h" ++#include "glusterd-op-sm.h" ++#include "glusterd-store.h" ++#include "glusterd-utils.h" ++#include "glusterd-nfs-svc.h" ++#include "glusterd-volgen.h" ++#include "glusterd-messages.h" ++#include ++ ++#include ++ ++int ++start_ganesha(char **op_errstr); ++ ++typedef struct service_command { ++ char *binary; ++ char *service; ++ int (*action)(struct service_command *, char *); ++} service_command; ++ ++/* parsing_ganesha_ha_conf will allocate the returned string ++ * to be freed (GF_FREE) by the caller ++ * return NULL if error or not found */ ++static char * ++parsing_ganesha_ha_conf(const char *key) ++{ ++#define MAX_LINE 1024 ++ char scratch[MAX_LINE * 2] = { ++ 0, ++ }; ++ char *value = NULL, *pointer = NULL, *end_pointer = NULL; ++ FILE *fp; ++ ++ fp = fopen(GANESHA_HA_CONF, "r"); ++ if (fp == NULL) { ++ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, ++ "couldn't open the file %s", GANESHA_HA_CONF); ++ goto end_ret; ++ } ++ while ((pointer = fgets(scratch, MAX_LINE, fp)) != NULL) { ++ /* Read config file until we get matching "^[[:space:]]*key" */ ++ if (*pointer == '#') { ++ continue; ++ } ++ while (isblank(*pointer)) { ++ pointer++; ++ } ++ if (strncmp(pointer, key, strlen(key))) { ++ continue; ++ } ++ pointer += strlen(key); ++ /* key found : if we fail to parse, we'll return an error ++ * rather than trying next one ++ * - supposition : conf file is bash compatible : no space ++ * around the '=' */ ++ if (*pointer != '=') { ++ gf_msg(THIS->name, GF_LOG_ERROR, errno, ++ GD_MSG_GET_CONFIG_INFO_FAILED, "Parsing %s failed at key %s", ++ GANESHA_HA_CONF, key); ++ goto end_close; ++ } ++ pointer++; /* jump the '=' */ ++ ++ if (*pointer == '"' || *pointer == '\'') { ++ /* dont get the quote */ ++ pointer++; ++ } ++ end_pointer = pointer; ++ /* stop at the next closing quote or blank/newline */ ++ do { ++ end_pointer++; ++ } while (!(*end_pointer == '\'' || *end_pointer == '"' || ++ isspace(*end_pointer) || *end_pointer == '\0')); ++ *end_pointer = '\0'; ++ ++ /* got it. copy it and return */ ++ value = gf_strdup(pointer); ++ break; ++ } ++ ++end_close: ++ fclose(fp); ++end_ret: ++ return value; ++} ++ ++static int ++sc_systemctl_action(struct service_command *sc, char *command) ++{ ++ runner_t runner = { ++ 0, ++ }; ++ ++ runinit(&runner); ++ runner_add_args(&runner, sc->binary, command, sc->service, NULL); ++ return runner_run(&runner); ++} ++ ++static int ++sc_service_action(struct service_command *sc, char *command) ++{ ++ runner_t runner = { ++ 0, ++ }; ++ ++ runinit(&runner); ++ runner_add_args(&runner, sc->binary, sc->service, command, NULL); ++ return runner_run(&runner); ++} ++ ++static int ++manage_service(char *action) ++{ ++ struct stat stbuf = { ++ 0, ++ }; ++ int i = 0; ++ int ret = 0; ++ struct service_command sc_list[] = {{.binary = "/usr/bin/systemctl", ++ .service = "nfs-ganesha", ++ .action = sc_systemctl_action}, ++ {.binary = "/sbin/invoke-rc.d", ++ .service = "nfs-ganesha", ++ .action = sc_service_action}, ++ {.binary = "/sbin/service", ++ .service = "nfs-ganesha", ++ .action = sc_service_action}, ++ {.binary = NULL}}; ++ ++ while (sc_list[i].binary != NULL) { ++ ret = sys_stat(sc_list[i].binary, &stbuf); ++ if (ret == 0) { ++ gf_msg_debug(THIS->name, 0, "%s found.", sc_list[i].binary); ++ if (strcmp(sc_list[i].binary, "/usr/bin/systemctl") == 0) ++ ret = sc_systemctl_action(&sc_list[i], action); ++ else ++ ret = sc_service_action(&sc_list[i], action); ++ ++ return ret; ++ } ++ i++; ++ } ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNRECOGNIZED_SVC_MNGR, ++ "Could not %s NFS-Ganesha.Service manager for distro" ++ " not recognized.", ++ action); ++ return ret; ++} ++ ++/* ++ * Check if the cluster is a ganesha cluster or not * ++ */ ++gf_boolean_t ++glusterd_is_ganesha_cluster() ++{ ++ int ret = -1; ++ glusterd_conf_t *priv = NULL; ++ xlator_t *this = NULL; ++ gf_boolean_t ret_bool = _gf_false; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("ganesha", this, out); ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, ++ _gf_false); ++ if (ret == _gf_true) { ++ ret_bool = _gf_true; ++ gf_msg_debug(this->name, 0, "nfs-ganesha is enabled for the cluster"); ++ } else ++ gf_msg_debug(this->name, 0, "nfs-ganesha is disabled for the cluster"); ++ ++out: ++ return ret_bool; ++} ++ ++/* Check if ganesha.enable is set to 'on', that checks if ++ * a particular volume is exported via NFS-Ganesha */ ++gf_boolean_t ++glusterd_check_ganesha_export(glusterd_volinfo_t *volinfo) ++{ ++ char *value = NULL; ++ gf_boolean_t is_exported = _gf_false; ++ int ret = 0; ++ ++ ret = glusterd_volinfo_get(volinfo, "ganesha.enable", &value); ++ if ((ret == 0) && value) { ++ if (strcmp(value, "on") == 0) { ++ gf_msg_debug(THIS->name, 0, ++ "ganesha.enable set" ++ " to %s", ++ value); ++ is_exported = _gf_true; ++ } ++ } ++ return is_exported; ++} ++ ++/* * ++ * The below function is called as part of commit phase for volume set option ++ * "ganesha.enable". If the value is "on", it creates export configuration file ++ * and then export the volume via dbus command. Incase of "off", the volume ++ * will be already unexported during stage phase, so it will remove the conf ++ * file from shared storage ++ */ ++int ++glusterd_check_ganesha_cmd(char *key, char *value, char **errstr, dict_t *dict) ++{ ++ int ret = 0; ++ char *volname = NULL; ++ ++ GF_ASSERT(key); ++ GF_ASSERT(value); ++ GF_ASSERT(dict); ++ ++ if ((strcmp(key, "ganesha.enable") == 0)) { ++ if ((strcmp(value, "on")) && (strcmp(value, "off"))) { ++ gf_asprintf(errstr, ++ "Invalid value" ++ " for volume set command. Use on/off only."); ++ ret = -1; ++ goto out; ++ } ++ if (strcmp(value, "on") == 0) { ++ ret = glusterd_handle_ganesha_op(dict, errstr, key, value); ++ ++ } else if (is_origin_glusterd(dict)) { ++ ret = dict_get_str(dict, "volname", &volname); ++ if (ret) { ++ gf_msg("glusterd-ganesha", GF_LOG_ERROR, errno, ++ GD_MSG_DICT_GET_FAILED, "Unable to get volume name"); ++ goto out; ++ } ++ ret = manage_export_config(volname, "off", errstr); ++ } ++ } ++out: ++ if (ret) { ++ gf_msg("glusterd-ganesha", GF_LOG_ERROR, 0, ++ GD_MSG_NFS_GNS_OP_HANDLE_FAIL, ++ "Handling NFS-Ganesha" ++ " op failed."); ++ } ++ return ret; ++} ++ ++int ++glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr) ++{ ++ int ret = -1; ++ int value = -1; ++ gf_boolean_t option = _gf_false; ++ char *str = NULL; ++ glusterd_conf_t *priv = NULL; ++ xlator_t *this = NULL; ++ ++ GF_ASSERT(dict); ++ this = THIS; ++ GF_ASSERT(this); ++ priv = this->private; ++ GF_ASSERT(priv); ++ ++ value = dict_get_str_boolean(dict, "value", _gf_false); ++ if (value == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "value not present."); ++ goto out; ++ } ++ /* This dict_get will fail if the user had never set the key before */ ++ /*Ignoring the ret value and proceeding */ ++ ret = dict_get_str(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_GET_FAILED, ++ "Global dict not present."); ++ ret = 0; ++ goto out; ++ } ++ /* Validity of the value is already checked */ ++ ret = gf_string2boolean(str, &option); ++ /* Check if the feature is already enabled, fail in that case */ ++ if (value == option) { ++ gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", str); ++ ret = -1; ++ goto out; ++ } ++ ++ if (value) { ++ ret = start_ganesha(op_errstr); ++ if (ret) { ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL, ++ "Could not start NFS-Ganesha"); ++ } ++ } else { ++ ret = stop_ganesha(op_errstr); ++ if (ret) ++ gf_msg_debug(THIS->name, 0, ++ "Could not stop " ++ "NFS-Ganesha."); ++ } ++ ++out: ++ ++ if (ret) { ++ if (!(*op_errstr)) { ++ *op_errstr = gf_strdup("Error, Validation Failed"); ++ gf_msg_debug(this->name, 0, "Error, Cannot Validate option :%s", ++ GLUSTERD_STORE_KEY_GANESHA_GLOBAL); ++ } else { ++ gf_msg_debug(this->name, 0, "Error, Cannot Validate option"); ++ } ++ } ++ return ret; ++} ++ ++int ++glusterd_op_set_ganesha(dict_t *dict, char **errstr) ++{ ++ int ret = 0; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ char *key = NULL; ++ char *value = NULL; ++ char *next_version = NULL; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ GF_ASSERT(dict); ++ ++ priv = this->private; ++ GF_ASSERT(priv); ++ ++ ret = dict_get_str(dict, "key", &key); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Couldn't get key in global option set"); ++ goto out; ++ } ++ ++ ret = dict_get_str(dict, "value", &value); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Couldn't get value in global option set"); ++ goto out; ++ } ++ ++ ret = glusterd_handle_ganesha_op(dict, errstr, key, value); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_SETUP_FAIL, ++ "Initial NFS-Ganesha set up failed"); ++ ret = -1; ++ goto out; ++ } ++ ret = dict_set_dynstr_with_alloc(priv->opts, ++ GLUSTERD_STORE_KEY_GANESHA_GLOBAL, value); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set" ++ " nfs-ganesha in dict."); ++ goto out; ++ } ++ ret = glusterd_get_next_global_opt_version_str(priv->opts, &next_version); ++ if (ret) { ++ gf_msg_debug(THIS->name, 0, ++ "Could not fetch " ++ " global op version"); ++ goto out; ++ } ++ ret = dict_set_str(priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, next_version); ++ if (ret) ++ goto out; ++ ++ ret = glusterd_store_options(this, priv->opts); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL, ++ "Failed to store options"); ++ goto out; ++ } ++ ++out: ++ gf_msg_debug(this->name, 0, "returning %d", ret); ++ return ret; ++} ++ ++/* Following function parse GANESHA_HA_CONF ++ * The sample file looks like below, ++ * HA_NAME="ganesha-ha-360" ++ * HA_VOL_NAME="ha-state" ++ * HA_CLUSTER_NODES="server1,server2" ++ * VIP_rhs_1="10.x.x.x" ++ * VIP_rhs_2="10.x.x.x." */ ++ ++/* Check if the localhost is listed as one of nfs-ganesha nodes */ ++gf_boolean_t ++check_host_list(void) ++{ ++ glusterd_conf_t *priv = NULL; ++ char *hostname, *hostlist; ++ gf_boolean_t ret = _gf_false; ++ xlator_t *this = NULL; ++ ++ this = THIS; ++ priv = THIS->private; ++ GF_ASSERT(priv); ++ ++ hostlist = parsing_ganesha_ha_conf("HA_CLUSTER_NODES"); ++ if (hostlist == NULL) { ++ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_CONFIG_INFO_FAILED, ++ "couldn't get HA_CLUSTER_NODES from file %s", GANESHA_HA_CONF); ++ return _gf_false; ++ } ++ ++ /* Hostlist is a comma separated list now */ ++ hostname = strtok(hostlist, ","); ++ while (hostname != NULL) { ++ ret = gf_is_local_addr(hostname); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NFS_GNS_HOST_FOUND, ++ "ganesha host found " ++ "Hostname is %s", ++ hostname); ++ break; ++ } ++ hostname = strtok(NULL, ","); ++ } ++ ++ GF_FREE(hostlist); ++ return ret; ++} ++ ++int ++manage_export_config(char *volname, char *value, char **op_errstr) ++{ ++ runner_t runner = { ++ 0, ++ }; ++ int ret = -1; ++ ++ GF_ASSERT(volname); ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/create-export-ganesha.sh", ++ CONFDIR, value, volname, NULL); ++ ret = runner_run(&runner); ++ ++ if (ret) ++ gf_asprintf(op_errstr, ++ "Failed to create" ++ " NFS-Ganesha export config file."); ++ ++ return ret; ++} ++ ++/* Exports and unexports a particular volume via NFS-Ganesha */ ++int ++ganesha_manage_export(dict_t *dict, char *value, char **op_errstr) ++{ ++ runner_t runner = { ++ 0, ++ }; ++ int ret = -1; ++ glusterd_volinfo_t *volinfo = NULL; ++ dict_t *vol_opts = NULL; ++ char *volname = NULL; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ gf_boolean_t option = _gf_false; ++ ++ runinit(&runner); ++ this = THIS; ++ GF_ASSERT(this); ++ priv = this->private; ++ ++ GF_ASSERT(value); ++ GF_ASSERT(dict); ++ GF_ASSERT(priv); ++ ++ ret = dict_get_str(dict, "volname", &volname); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Unable to get volume name"); ++ goto out; ++ } ++ ret = gf_string2boolean(value, &option); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, ++ "invalid value."); ++ goto out; ++ } ++ ++ ret = glusterd_volinfo_find(volname, &volinfo); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, ++ FMTSTR_CHECK_VOL_EXISTS, volname); ++ goto out; ++ } ++ ++ ret = glusterd_check_ganesha_export(volinfo); ++ if (ret && option) { ++ gf_asprintf(op_errstr, ++ "ganesha.enable " ++ "is already 'on'."); ++ ret = -1; ++ goto out; ++ ++ } else if (!option && !ret) { ++ gf_asprintf(op_errstr, ++ "ganesha.enable " ++ "is already 'off'."); ++ ret = -1; ++ goto out; ++ } ++ ++ /* Check if global option is enabled, proceed only then */ ++ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, ++ _gf_false); ++ if (ret == -1) { ++ gf_msg_debug(this->name, 0, ++ "Failed to get " ++ "global option dict."); ++ gf_asprintf(op_errstr, ++ "The option " ++ "nfs-ganesha should be " ++ "enabled before setting ganesha.enable."); ++ goto out; ++ } ++ if (!ret) { ++ gf_asprintf(op_errstr, ++ "The option " ++ "nfs-ganesha should be " ++ "enabled before setting ganesha.enable."); ++ ret = -1; ++ goto out; ++ } ++ ++ /* * ++ * Create the export file from the node where ganesha.enable "on" ++ * is executed ++ * */ ++ if (option) { ++ ret = manage_export_config(volname, "on", op_errstr); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL, ++ "Failed to create" ++ "export file for NFS-Ganesha\n"); ++ goto out; ++ } ++ } ++ ++ if (check_host_list()) { ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/dbus-send.sh", CONFDIR, ++ value, volname, NULL); ++ ret = runner_run(&runner); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "Dynamic export" ++ " addition/deletion failed." ++ " Please see log file for details"); ++ goto out; ++ } ++ } ++ ++ vol_opts = volinfo->dict; ++ ret = dict_set_dynstr_with_alloc(vol_opts, "features.cache-invalidation", ++ value); ++ if (ret) ++ gf_asprintf(op_errstr, ++ "Cache-invalidation could not" ++ " be set to %s.", ++ value); ++ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); ++ if (ret) ++ gf_asprintf(op_errstr, "failed to store volinfo for %s", ++ volinfo->volname); ++ ++out: ++ return ret; ++} ++ ++int ++tear_down_cluster(gf_boolean_t run_teardown) ++{ ++ int ret = 0; ++ runner_t runner = { ++ 0, ++ }; ++ struct stat st = { ++ 0, ++ }; ++ DIR *dir = NULL; ++ struct dirent *entry = NULL; ++ struct dirent scratch[2] = { ++ { ++ 0, ++ }, ++ }; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ ++ if (run_teardown) { ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", ++ "teardown", CONFDIR, NULL); ++ ret = runner_run(&runner); ++ /* * ++ * Remove all the entries in CONFDIR expect ganesha.conf and ++ * ganesha-ha.conf ++ */ ++ dir = sys_opendir(CONFDIR); ++ if (!dir) { ++ gf_msg_debug(THIS->name, 0, ++ "Failed to open directory %s. " ++ "Reason : %s", ++ CONFDIR, strerror(errno)); ++ ret = 0; ++ goto out; ++ } ++ ++ GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); ++ while (entry) { ++ snprintf(path, PATH_MAX, "%s/%s", CONFDIR, entry->d_name); ++ ret = sys_lstat(path, &st); ++ if (ret == -1) { ++ gf_msg_debug(THIS->name, 0, ++ "Failed to stat entry %s :" ++ " %s", ++ path, strerror(errno)); ++ goto out; ++ } ++ ++ if (strcmp(entry->d_name, "ganesha.conf") == 0 || ++ strcmp(entry->d_name, "ganesha-ha.conf") == 0) ++ gf_msg_debug(THIS->name, 0, ++ " %s is not required" ++ " to remove", ++ path); ++ else if (S_ISDIR(st.st_mode)) ++ ret = recursive_rmdir(path); ++ else ++ ret = sys_unlink(path); ++ ++ if (ret) { ++ gf_msg_debug(THIS->name, 0, ++ " Failed to remove %s. " ++ "Reason : %s", ++ path, strerror(errno)); ++ } ++ ++ gf_msg_debug(THIS->name, 0, "%s %s", ++ ret ? "Failed to remove" : "Removed", entry->d_name); ++ GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); ++ } ++ ++ ret = sys_closedir(dir); ++ if (ret) { ++ gf_msg_debug(THIS->name, 0, ++ "Failed to close dir %s. Reason :" ++ " %s", ++ CONFDIR, strerror(errno)); ++ } ++ } ++ ++out: ++ return ret; ++} ++ ++int ++setup_cluster(gf_boolean_t run_setup) ++{ ++ int ret = 0; ++ runner_t runner = { ++ 0, ++ }; ++ ++ if (run_setup) { ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "setup", ++ CONFDIR, NULL); ++ ret = runner_run(&runner); ++ } ++ return ret; ++} ++ ++static int ++teardown(gf_boolean_t run_teardown, char **op_errstr) ++{ ++ runner_t runner = { ++ 0, ++ }; ++ int ret = 1; ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_conf_t *priv = NULL; ++ dict_t *vol_opts = NULL; ++ ++ priv = THIS->private; ++ ++ ret = tear_down_cluster(run_teardown); ++ if (ret == -1) { ++ gf_asprintf(op_errstr, ++ "Cleanup of NFS-Ganesha" ++ " HA config failed."); ++ goto out; ++ } ++ ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "cleanup", ++ CONFDIR, NULL); ++ ret = runner_run(&runner); ++ if (ret) ++ gf_msg_debug(THIS->name, 0, ++ "Could not clean up" ++ " NFS-Ganesha related config"); ++ ++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) ++ { ++ vol_opts = volinfo->dict; ++ /* All the volumes exported via NFS-Ganesha will be ++ unexported, hence setting the appropriate keys */ ++ ret = dict_set_str(vol_opts, "features.cache-invalidation", "off"); ++ if (ret) ++ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED, ++ "Could not set features.cache-invalidation " ++ "to off for %s", ++ volinfo->volname); ++ ++ ret = dict_set_str(vol_opts, "ganesha.enable", "off"); ++ if (ret) ++ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED, ++ "Could not set ganesha.enable to off for %s", ++ volinfo->volname); ++ ++ ret = glusterd_store_volinfo(volinfo, ++ GLUSTERD_VOLINFO_VER_AC_INCREMENT); ++ if (ret) ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL, ++ "failed to store volinfo for %s", volinfo->volname); ++ } ++out: ++ return ret; ++} ++ ++int ++stop_ganesha(char **op_errstr) ++{ ++ int ret = 0; ++ runner_t runner = { ++ 0, ++ }; ++ ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", ++ "--setup-ganesha-conf-files", CONFDIR, "no", NULL); ++ ret = runner_run(&runner); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "removal of symlink ganesha.conf " ++ "in /etc/ganesha failed"); ++ } ++ ++ if (check_host_list()) { ++ ret = manage_service("stop"); ++ if (ret) ++ gf_asprintf(op_errstr, ++ "NFS-Ganesha service could not" ++ "be stopped."); ++ } ++ return ret; ++} ++ ++int ++start_ganesha(char **op_errstr) ++{ ++ int ret = -1; ++ dict_t *vol_opts = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_conf_t *priv = NULL; ++ runner_t runner = { ++ 0, ++ }; ++ ++ priv = THIS->private; ++ GF_ASSERT(priv); ++ ++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) ++ { ++ vol_opts = volinfo->dict; ++ /* Gluster-nfs has to be disabled across the trusted pool */ ++ /* before attempting to start nfs-ganesha */ ++ ret = dict_set_str(vol_opts, NFS_DISABLE_MAP_KEY, "on"); ++ if (ret) ++ goto out; ++ ++ ret = glusterd_store_volinfo(volinfo, ++ GLUSTERD_VOLINFO_VER_AC_INCREMENT); ++ if (ret) { ++ *op_errstr = gf_strdup( ++ "Failed to store the " ++ "Volume information"); ++ goto out; ++ } ++ } ++ ++ /* If the nfs svc is not initialized it means that the service is not ++ * running, hence we can skip the process of stopping gluster-nfs ++ * service ++ */ ++ if (priv->nfs_svc.inited) { ++ ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL); ++ if (ret) { ++ ret = -1; ++ gf_asprintf(op_errstr, ++ "Gluster-NFS service could" ++ "not be stopped, exiting."); ++ goto out; ++ } ++ } ++ ++ if (check_host_list()) { ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", ++ "--setup-ganesha-conf-files", CONFDIR, "yes", NULL); ++ ret = runner_run(&runner); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "creation of symlink ganesha.conf " ++ "in /etc/ganesha failed"); ++ goto out; ++ } ++ ret = manage_service("start"); ++ if (ret) ++ gf_asprintf(op_errstr, ++ "NFS-Ganesha failed to start." ++ "Please see log file for details"); ++ } ++ ++out: ++ return ret; ++} ++ ++static int ++pre_setup(gf_boolean_t run_setup, char **op_errstr) ++{ ++ int ret = 0; ++ ++ ret = check_host_list(); ++ ++ if (ret) { ++ ret = setup_cluster(run_setup); ++ if (ret == -1) ++ gf_asprintf(op_errstr, ++ "Failed to set up HA " ++ "config for NFS-Ganesha. " ++ "Please check the log file for details"); ++ } ++ ++ return ret; ++} ++ ++int ++glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key, ++ char *value) ++{ ++ int32_t ret = -1; ++ gf_boolean_t option = _gf_false; ++ ++ GF_ASSERT(dict); ++ GF_ASSERT(op_errstr); ++ GF_ASSERT(key); ++ GF_ASSERT(value); ++ ++ if (strcmp(key, "ganesha.enable") == 0) { ++ ret = ganesha_manage_export(dict, value, op_errstr); ++ if (ret < 0) ++ goto out; ++ } ++ ++ /* It is possible that the key might not be set */ ++ ret = gf_string2boolean(value, &option); ++ if (ret == -1) { ++ gf_asprintf(op_errstr, "Invalid value in key-value pair."); ++ goto out; ++ } ++ ++ if (strcmp(key, GLUSTERD_STORE_KEY_GANESHA_GLOBAL) == 0) { ++ /* * ++ * The set up/teardown of pcs cluster should be performed only ++ * once. This will done on the node in which the cli command ++ * 'gluster nfs-ganesha ' got executed. So that ++ * node should part of ganesha HA cluster ++ */ ++ if (option) { ++ ret = pre_setup(is_origin_glusterd(dict), op_errstr); ++ if (ret < 0) ++ goto out; ++ } else { ++ ret = teardown(is_origin_glusterd(dict), op_errstr); ++ if (ret < 0) ++ goto out; ++ } ++ } ++ ++out: ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index de44af7..528993c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -1911,6 +1911,83 @@ glusterd_op_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + return ret; + } + ++int ++__glusterd_handle_ganesha_cmd(rpcsvc_request_t *req) ++{ ++ int32_t ret = -1; ++ gf_cli_req cli_req = {{ ++ 0, ++ }}; ++ dict_t *dict = NULL; ++ glusterd_op_t cli_op = GD_OP_GANESHA; ++ char *op_errstr = NULL; ++ char err_str[2048] = { ++ 0, ++ }; ++ xlator_t *this = NULL; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ GF_ASSERT(req); ++ ++ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); ++ if (ret < 0) { ++ snprintf(err_str, sizeof(err_str), ++ "Failed to decode " ++ "request received from cli"); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", ++ err_str); ++ req->rpc_err = GARBAGE_ARGS; ++ goto out; ++ } ++ ++ if (cli_req.dict.dict_len) { ++ /* Unserialize the dictionary */ ++ dict = dict_new(); ++ if (!dict) { ++ ret = -1; ++ goto out; ++ } ++ ++ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, ++ &dict); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, ++ "failed to " ++ "unserialize req-buffer to dictionary"); ++ snprintf(err_str, sizeof(err_str), ++ "Unable to decode " ++ "the command"); ++ goto out; ++ } else { ++ dict->extra_stdfree = cli_req.dict.dict_val; ++ } ++ } ++ ++ gf_msg_trace(this->name, 0, "Received global option request"); ++ ++ ret = glusterd_op_begin_synctask(req, GD_OP_GANESHA, dict); ++out: ++ if (ret) { ++ if (err_str[0] == '\0') ++ snprintf(err_str, sizeof(err_str), "Operation failed"); ++ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); ++ } ++ if (op_errstr) ++ GF_FREE(op_errstr); ++ if (dict) ++ dict_unref(dict); ++ ++ return ret; ++} ++ ++int ++glusterd_handle_ganesha_cmd(rpcsvc_request_t *req) ++{ ++ return glusterd_big_locked_handler(req, __glusterd_handle_ganesha_cmd); ++} ++ + static int + __glusterd_handle_reset_volume(rpcsvc_request_t *req) + { +@@ -6644,6 +6721,8 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = { + [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER_VOLUME", + GLUSTER_CLI_BARRIER_VOLUME, + glusterd_handle_barrier, NULL, 0, DRC_NA}, ++ [GLUSTER_CLI_GANESHA] = {"GANESHA", GLUSTER_CLI_GANESHA, ++ glusterd_handle_ganesha_cmd, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", GLUSTER_CLI_GET_VOL_OPT, + glusterd_handle_get_vol_opt, NULL, 0, DRC_NA}, + [GLUSTER_CLI_BITROT] = {"BITROT", GLUSTER_CLI_BITROT, +diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h +index 1a4bd54..9558480 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-messages.h ++++ b/xlators/mgmt/glusterd/src/glusterd-messages.h +@@ -297,7 +297,7 @@ GLFS_MSGID( + GD_MSG_LOCALTIME_LOGGING_VOL_OPT_VALIDATE_FAIL, + GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE, + GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL, +- GD_MSG_MANAGER_FUNCTION_FAILED, ++ GD_MSG_MANAGER_FUNCTION_FAILED, GD_MSG_NFS_GANESHA_DISABLED, + GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL); + + #endif /* !_GLUSTERD_MESSAGES_H_ */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 12d857a..a630c48 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -1176,6 +1176,13 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr) + if (ret) + goto out; + ++ if ((strcmp(key, "ganesha.enable") == 0) && ++ (strcmp(value, "off") == 0)) { ++ ret = ganesha_manage_export(dict, "off", op_errstr); ++ if (ret) ++ goto out; ++ } ++ + ret = glusterd_check_quota_cmd(key, value, errstr, sizeof(errstr)); + if (ret) + goto out; +@@ -1677,6 +1684,20 @@ glusterd_op_stage_reset_volume(dict_t *dict, char **op_errstr) + goto out; + } + ++ /* * ++ * If key ganesha.enable is set, then volume should be unexported from ++ * ganesha server. Also it is a volume-level option, perform only when ++ * volume name not equal to "all"(in other words if volinfo != NULL) ++ */ ++ if (volinfo && (!strcmp(key, "all") || !strcmp(key, "ganesha.enable"))) { ++ if (glusterd_check_ganesha_export(volinfo)) { ++ ret = ganesha_manage_export(dict, "off", op_errstr); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL, ++ "Could not reset ganesha.enable key"); ++ } ++ } ++ + if (strcmp(key, "all")) { + exists = glusterd_check_option_exists(key, &key_fixed); + if (exists == -1) { +@@ -2393,6 +2414,15 @@ glusterd_op_reset_volume(dict_t *dict, char **op_rspstr) + } + } + ++ if (!strcmp(key, "ganesha.enable") || !strcmp(key, "all")) { ++ if (glusterd_check_ganesha_export(volinfo)) { ++ ret = manage_export_config(volname, "off", op_rspstr); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL, ++ "Could not reset ganesha.enable key"); ++ } ++ } ++ + out: + GF_FREE(key_fixed); + if (quorum_action) +@@ -2964,6 +2994,10 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + } + } + ++ ret = glusterd_check_ganesha_cmd(key, value, errstr, dict); ++ if (ret == -1) ++ goto out; ++ + if (!is_key_glusterd_hooks_friendly(key)) { + ret = glusterd_check_option_exists(key, &key_fixed); + GF_ASSERT(ret); +@@ -4494,7 +4528,8 @@ glusterd_op_build_payload(dict_t **req, char **op_errstr, dict_t *op_ctx) + + case GD_OP_SYNC_VOLUME: + case GD_OP_COPY_FILE: +- case GD_OP_SYS_EXEC: { ++ case GD_OP_SYS_EXEC: ++ case GD_OP_GANESHA: { + dict_copy(dict, req_dict); + } break; + +@@ -5944,6 +5979,10 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr, + ret = glusterd_op_stage_set_volume(dict, op_errstr); + break; + ++ case GD_OP_GANESHA: ++ ret = glusterd_op_stage_set_ganesha(dict, op_errstr); ++ break; ++ + case GD_OP_RESET_VOLUME: + ret = glusterd_op_stage_reset_volume(dict, op_errstr); + break; +@@ -6074,7 +6113,9 @@ glusterd_op_commit_perform(glusterd_op_t op, dict_t *dict, char **op_errstr, + case GD_OP_SET_VOLUME: + ret = glusterd_op_set_volume(dict, op_errstr); + break; +- ++ case GD_OP_GANESHA: ++ ret = glusterd_op_set_ganesha(dict, op_errstr); ++ break; + case GD_OP_RESET_VOLUME: + ret = glusterd_op_reset_volume(dict, op_errstr); + break; +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index 2958443..041946d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -3788,6 +3788,148 @@ out: + return ret; + } + ++/* * ++ * Here there are two possibilities, either destination is snaphot or ++ * clone. In the case of snapshot nfs_ganesha export file will be copied ++ * to snapdir. If it is clone , then new export file will be created for ++ * the clone in the GANESHA_EXPORT_DIRECTORY, replacing occurences of ++ * volname with clonename ++ */ ++int ++glusterd_copy_nfs_ganesha_file(glusterd_volinfo_t *src_vol, ++ glusterd_volinfo_t *dest_vol) ++{ ++ int32_t ret = -1; ++ char snap_dir[PATH_MAX] = { ++ 0, ++ }; ++ char src_path[PATH_MAX] = { ++ 0, ++ }; ++ char dest_path[PATH_MAX] = { ++ 0, ++ }; ++ char buffer[BUFSIZ] = { ++ 0, ++ }; ++ char *find_ptr = NULL; ++ char *buff_ptr = NULL; ++ char *tmp_ptr = NULL; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ struct stat stbuf = { ++ 0, ++ }; ++ FILE *src = NULL; ++ FILE *dest = NULL; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("snapshot", this, out); ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, src_vol, out); ++ GF_VALIDATE_OR_GOTO(this->name, dest_vol, out); ++ ++ if (glusterd_check_ganesha_export(src_vol) == _gf_false) { ++ gf_msg_debug(this->name, 0, ++ "%s is not exported via " ++ "NFS-Ganesha. Skipping copy of export conf.", ++ src_vol->volname); ++ ret = 0; ++ goto out; ++ } ++ ++ if (src_vol->is_snap_volume) { ++ GLUSTERD_GET_SNAP_DIR(snap_dir, src_vol->snapshot, priv); ++ ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf", snap_dir, ++ src_vol->snapshot->snapname); ++ } else { ++ ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf", ++ GANESHA_EXPORT_DIRECTORY, src_vol->volname); ++ } ++ if (ret < 0 || ret >= PATH_MAX) ++ goto out; ++ ++ ret = sys_lstat(src_path, &stbuf); ++ if (ret) { ++ /* ++ * This code path is hit, only when the src_vol is being * ++ * exported via NFS-Ganesha. So if the conf file is not * ++ * available, we fail the snapshot operation. * ++ */ ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, ++ "Stat on %s failed with %s", src_path, strerror(errno)); ++ goto out; ++ } ++ ++ if (dest_vol->is_snap_volume) { ++ memset(snap_dir, 0, PATH_MAX); ++ GLUSTERD_GET_SNAP_DIR(snap_dir, dest_vol->snapshot, priv); ++ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf", ++ snap_dir, dest_vol->snapshot->snapname); ++ if (ret < 0) ++ goto out; ++ ++ ret = glusterd_copy_file(src_path, dest_path); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "Failed to copy %s in %s", src_path, dest_path); ++ goto out; ++ } ++ ++ } else { ++ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf", ++ GANESHA_EXPORT_DIRECTORY, dest_vol->volname); ++ if (ret < 0) ++ goto out; ++ ++ src = fopen(src_path, "r"); ++ dest = fopen(dest_path, "w"); ++ ++ if (!src || !dest) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, ++ "Failed to open %s", dest ? src_path : dest_path); ++ ret = -1; ++ goto out; ++ } ++ ++ /* * ++ * if the source volume is snapshot, the export conf file ++ * consists of orginal volname ++ */ ++ if (src_vol->is_snap_volume) ++ find_ptr = gf_strdup(src_vol->parent_volname); ++ else ++ find_ptr = gf_strdup(src_vol->volname); ++ ++ if (!find_ptr) ++ goto out; ++ ++ /* Replacing volname with clonename */ ++ while (fgets(buffer, BUFSIZ, src)) { ++ buff_ptr = buffer; ++ while ((tmp_ptr = strstr(buff_ptr, find_ptr))) { ++ while (buff_ptr < tmp_ptr) ++ fputc((int)*buff_ptr++, dest); ++ fputs(dest_vol->volname, dest); ++ buff_ptr += strlen(find_ptr); ++ } ++ fputs(buff_ptr, dest); ++ memset(buffer, 0, BUFSIZ); ++ } ++ } ++out: ++ if (src) ++ fclose(src); ++ if (dest) ++ fclose(dest); ++ if (find_ptr) ++ GF_FREE(find_ptr); ++ ++ return ret; ++} ++ + int32_t + glusterd_restore_geo_rep_files(glusterd_volinfo_t *snap_vol) + { +@@ -3876,6 +4018,60 @@ out: + return ret; + } + ++int ++glusterd_restore_nfs_ganesha_file(glusterd_volinfo_t *src_vol, ++ glusterd_snap_t *snap) ++{ ++ int32_t ret = -1; ++ char snap_dir[PATH_MAX] = ""; ++ char src_path[PATH_MAX] = ""; ++ char dest_path[PATH_MAX] = ""; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ struct stat stbuf = { ++ 0, ++ }; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("snapshot", this, out); ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, src_vol, out); ++ GF_VALIDATE_OR_GOTO(this->name, snap, out); ++ ++ GLUSTERD_GET_SNAP_DIR(snap_dir, snap, priv); ++ ++ ret = snprintf(src_path, sizeof(src_path), "%s/export.%s.conf", snap_dir, ++ snap->snapname); ++ if (ret < 0) ++ goto out; ++ ++ ret = sys_lstat(src_path, &stbuf); ++ if (ret) { ++ if (errno == ENOENT) { ++ ret = 0; ++ gf_msg_debug(this->name, 0, "%s not found", src_path); ++ } else ++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, ++ "Stat on %s failed with %s", src_path, strerror(errno)); ++ goto out; ++ } ++ ++ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf", ++ GANESHA_EXPORT_DIRECTORY, src_vol->volname); ++ if (ret < 0) ++ goto out; ++ ++ ret = glusterd_copy_file(src_path, dest_path); ++ if (ret) ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "Failed to copy %s in %s", src_path, dest_path); ++ ++out: ++ return ret; ++} ++ + /* Snapd functions */ + int + glusterd_is_snapd_enabled(glusterd_volinfo_t *volinfo) +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h +index e60be6e..41d0001 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.h ++++ b/xlators/mgmt/glusterd/src/glusterd-store.h +@@ -118,6 +118,8 @@ typedef enum glusterd_store_ver_ac_ { + #define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED "migration-skipped" + #define GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME "migration-run-time" + ++#define GLUSTERD_STORE_KEY_GANESHA_GLOBAL "nfs-ganesha" ++ + int32_t + glusterd_store_volinfo(glusterd_volinfo_t *volinfo, + glusterd_volinfo_ver_ac_t ac); +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 86ef470..a0417ca 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1823,6 +1823,18 @@ glusterd_op_stage_stop_volume(dict_t *dict, char **op_errstr) + goto out; + } + ++ ret = glusterd_check_ganesha_export(volinfo); ++ if (ret) { ++ ret = ganesha_manage_export(dict, "off", op_errstr); ++ if (ret) { ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, ++ GD_MSG_NFS_GNS_UNEXPRT_VOL_FAIL, ++ "Could not " ++ "unexport volume via NFS-Ganesha"); ++ ret = 0; ++ } ++ } ++ + if (glusterd_is_defrag_on(volinfo)) { + snprintf(msg, sizeof(msg), + "rebalance session is " +@@ -2674,6 +2686,8 @@ glusterd_op_start_volume(dict_t *dict, char **op_errstr) + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_svc_t *svc = NULL; ++ char *str = NULL; ++ gf_boolean_t option = _gf_false; + + this = THIS; + GF_ASSERT(this); +@@ -2731,6 +2745,29 @@ glusterd_op_start_volume(dict_t *dict, char **op_errstr) + } + } + ++ ret = dict_get_str(conf->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str); ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, ++ "Global dict not present."); ++ ret = 0; ++ ++ } else { ++ ret = gf_string2boolean(str, &option); ++ /* Check if the feature is enabled and set nfs-disable to true */ ++ if (option) { ++ gf_msg_debug(this->name, 0, "NFS-Ganesha is enabled"); ++ /* Gluster-nfs should not start when NFS-Ganesha is enabled*/ ++ ret = dict_set_str(volinfo->dict, NFS_DISABLE_MAP_KEY, "on"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, ++ "Failed to set nfs.disable for" ++ "volume %s", ++ volname); ++ goto out; ++ } ++ } ++ } ++ + ret = glusterd_start_volume(volinfo, flags, _gf_true); + if (ret) + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index d1244e4..13f423a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2597,6 +2597,13 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "features/upcall", + .op_version = GD_OP_VERSION_3_7_0, + }, ++ { ++ .key = "ganesha.enable", ++ .voltype = "features/ganesha", ++ .value = "off", ++ .option = "ganesha.enable", ++ .op_version = GD_OP_VERSION_3_7_0, ++ }, + /* Lease translator options */ + { + .key = "features.leases", +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 67867f8..5135181 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -61,6 +61,9 @@ + #define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging" + #define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level" + ++#define GANESHA_HA_CONF CONFDIR "/ganesha-ha.conf" ++#define GANESHA_EXPORT_DIRECTORY CONFDIR "/exports" ++ + #define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256 + #define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90 + #define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100 +@@ -1356,6 +1359,25 @@ glusterd_op_stop_volume(dict_t *dict); + int + glusterd_op_delete_volume(dict_t *dict); + int ++glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key, ++ char *value); ++int ++glusterd_check_ganesha_cmd(char *key, char *value, char **errstr, dict_t *dict); ++int ++glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr); ++int ++glusterd_op_set_ganesha(dict_t *dict, char **errstr); ++int ++ganesha_manage_export(dict_t *dict, char *value, char **op_errstr); ++gf_boolean_t ++glusterd_is_ganesha_cluster(); ++gf_boolean_t ++glusterd_check_ganesha_export(glusterd_volinfo_t *volinfo); ++int ++stop_ganesha(char **op_errstr); ++int ++tear_down_cluster(gf_boolean_t run_teardown); ++int + manage_export_config(char *volname, char *value, char **op_errstr); + + int +-- +1.8.3.1 + diff --git a/SOURCES/0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch b/SOURCES/0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch new file mode 100644 index 0000000..b2d6052 --- /dev/null +++ b/SOURCES/0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch @@ -0,0 +1,1897 @@ +From 267135e74d94d8a1e56a263b37ca4689020b9e53 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Mon, 16 Oct 2017 16:58:28 +0530 +Subject: [PATCH 055/124] Revert "storhaug HA: first step, remove resource + agents and setup script" + +This reverts commit c822e354e16646adf18bbc5123798663faa543b2. + +Label: DOWNSTREAM ONLY + +Change-Id: Idd50fe1a5be5a3258d560518d810f9ec4c57621a +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167104 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + configure.ac | 1 + + extras/ganesha/Makefile.am | 2 +- + extras/ganesha/ocf/Makefile.am | 11 + + extras/ganesha/ocf/ganesha_grace | 221 +++++++ + extras/ganesha/ocf/ganesha_mon | 234 +++++++ + extras/ganesha/ocf/ganesha_nfsd | 167 +++++ + extras/ganesha/scripts/Makefile.am | 6 +- + extras/ganesha/scripts/ganesha-ha.sh | 1125 ++++++++++++++++++++++++++++++++++ + glusterfs.spec.in | 8 +- + 9 files changed, 1771 insertions(+), 4 deletions(-) + create mode 100644 extras/ganesha/ocf/Makefile.am + create mode 100644 extras/ganesha/ocf/ganesha_grace + create mode 100644 extras/ganesha/ocf/ganesha_mon + create mode 100644 extras/ganesha/ocf/ganesha_nfsd + create mode 100644 extras/ganesha/scripts/ganesha-ha.sh + +diff --git a/configure.ac b/configure.ac +index 125ae29..baa811a 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -199,6 +199,7 @@ AC_CONFIG_FILES([Makefile + extras/ganesha/Makefile + extras/ganesha/config/Makefile + extras/ganesha/scripts/Makefile ++ extras/ganesha/ocf/Makefile + extras/systemd/Makefile + extras/systemd/glusterd.service + extras/systemd/glustereventsd.service +diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am +index 542de68..9eaa401 100644 +--- a/extras/ganesha/Makefile.am ++++ b/extras/ganesha/Makefile.am +@@ -1,2 +1,2 @@ +-SUBDIRS = scripts config ++SUBDIRS = scripts config ocf + CLEANFILES = +diff --git a/extras/ganesha/ocf/Makefile.am b/extras/ganesha/ocf/Makefile.am +new file mode 100644 +index 0000000..990a609 +--- /dev/null ++++ b/extras/ganesha/ocf/Makefile.am +@@ -0,0 +1,11 @@ ++EXTRA_DIST= ganesha_grace ganesha_mon ganesha_nfsd ++ ++# The root of the OCF resource agent hierarchy ++# Per the OCF standard, it's always "lib", ++# not "lib64" (even on 64-bit platforms). ++ocfdir = $(prefix)/lib/ocf ++ ++# The provider directory ++radir = $(ocfdir)/resource.d/heartbeat ++ ++ra_SCRIPTS = ganesha_grace ganesha_mon ganesha_nfsd +diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace +new file mode 100644 +index 0000000..825f716 +--- /dev/null ++++ b/extras/ganesha/ocf/ganesha_grace +@@ -0,0 +1,221 @@ ++#!/bin/bash ++# ++# Copyright (c) 2014 Anand Subramanian anands@redhat.com ++# Copyright (c) 2015 Red Hat Inc. ++# All Rights Reserved. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of version 2 of the GNU General Public License as ++# published by the Free Software Foundation. ++# ++# This program is distributed in the hope that it would be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ++# ++# Further, this software is distributed without any warranty that it is ++# free of the rightful claim of any third person regarding infringement ++# or the like. Any license provided herein, whether implied or ++# otherwise, applies only to this software file. Patent licenses, if ++# any, provided herein do not apply to combinations of this program with ++# other software, or any other product whatsoever. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write the Free Software Foundation, ++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# ++# ++ ++# Initialization: ++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++ ++if [ -n "$OCF_DEBUG_LIBRARY" ]; then ++ . $OCF_DEBUG_LIBRARY ++else ++ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++fi ++ ++OCF_RESKEY_grace_active_default="grace-active" ++: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} ++ ++ganesha_meta_data() { ++ cat < ++ ++ ++1.0 ++ ++ ++This Linux-specific resource agent acts as a dummy ++resource agent for nfs-ganesha. ++ ++ ++Manages the user-space nfs-ganesha NFS server ++ ++ ++ ++NFS-Ganesha grace active attribute ++NFS-Ganesha grace active attribute ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++END ++ ++return ${OCF_SUCCESS} ++} ++ ++ganesha_grace_usage() { ++ echo "ganesha.nfsd USAGE" ++} ++ ++# Make sure meta-data and usage always succeed ++case $__OCF_ACTION in ++ meta-data) ganesha_meta_data ++ exit ${OCF_SUCCESS} ++ ;; ++ usage|help) ganesha_usage ++ exit ${OCF_SUCCESS} ++ ;; ++ *) ++ ;; ++esac ++ ++ganesha_grace_start() ++{ ++ local rc=${OCF_ERR_GENERIC} ++ local host=$(hostname -s) ++ ++ ocf_log debug "ganesha_grace_start()" ++ # give ganesha_mon RA a chance to set the crm_attr first ++ # I mislike the sleep, but it's not clear that looping ++ # with a small sleep is necessarily better ++ # start has a 40sec timeout, so a 5sec sleep here is okay ++ sleep 5 ++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) ++ if [ $? -ne 0 ]; then ++ host=$(hostname) ++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null ) ++ if [ $? -ne 0 ]; then ++ ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" ++ fi ++ fi ++ ++ # Three possibilities: ++ # 1. There is no attribute at all and attr_updater returns ++ # a zero length string. This happens when ++ # ganesha_mon::monitor hasn't run at least once to set ++ # the attribute. The assumption here is that the system ++ # is coming up. We pretend, for now, that the node is ++ # healthy, to allow the system to continue coming up. ++ # It will cure itself in a few seconds ++ # 2. There is an attribute, and it has the value "1"; this ++ # node is healthy. ++ # 3. There is an attribute, but it has no value or the value ++ # "0"; this node is not healthy. ++ ++ # case 1 ++ if [[ -z "${attr}" ]]; then ++ return ${OCF_SUCCESS} ++ fi ++ ++ # case 2 ++ if [[ "${attr}" = *"value=1" ]]; then ++ return ${OCF_SUCCESS} ++ fi ++ ++ # case 3 ++ return ${OCF_NOT_RUNNING} ++} ++ ++ganesha_grace_stop() ++{ ++ ++ ocf_log debug "ganesha_grace_stop()" ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_grace_notify() ++{ ++ # since this is a clone RA we should only ever see pre-start ++ # or post-stop ++ mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" ++ case "${mode}" in ++ pre-start | post-stop) ++ dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} ++ if [ $? -ne 0 ]; then ++ ocf_log info "dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} failed" ++ fi ++ ;; ++ esac ++ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_grace_monitor() ++{ ++ local host=$(hostname -s) ++ ++ ocf_log debug "monitor" ++ ++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) ++ if [ $? -ne 0 ]; then ++ host=$(hostname) ++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) ++ if [ $? -ne 0 ]; then ++ ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" ++ fi ++ fi ++ ++ # if there is no attribute (yet), maybe it's because ++ # this RA started before ganesha_mon (nfs-mon) has had ++ # chance to create it. In which case we'll pretend ++ # everything is okay this time around ++ if [[ -z "${attr}" ]]; then ++ return ${OCF_SUCCESS} ++ fi ++ ++ if [[ "${attr}" = *"value=1" ]]; then ++ return ${OCF_SUCCESS} ++ fi ++ ++ return ${OCF_NOT_RUNNING} ++} ++ ++ganesha_grace_validate() ++{ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_grace_validate ++ ++# Translate each action into the appropriate function call ++case $__OCF_ACTION in ++start) ganesha_grace_start ++ ;; ++stop) ganesha_grace_stop ++ ;; ++status|monitor) ganesha_grace_monitor ++ ;; ++notify) ganesha_grace_notify ++ ;; ++*) ganesha_grace_usage ++ exit ${OCF_ERR_UNIMPLEMENTED} ++ ;; ++esac ++ ++rc=$? ++ ++# The resource agent may optionally log a debug message ++ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" ++exit $rc +diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon +new file mode 100644 +index 0000000..2b4a9d6 +--- /dev/null ++++ b/extras/ganesha/ocf/ganesha_mon +@@ -0,0 +1,234 @@ ++#!/bin/bash ++# ++# Copyright (c) 2014 Anand Subramanian anands@redhat.com ++# Copyright (c) 2015 Red Hat Inc. ++# All Rights Reserved. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of version 2 of the GNU General Public License as ++# published by the Free Software Foundation. ++# ++# This program is distributed in the hope that it would be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ++# ++# Further, this software is distributed without any warranty that it is ++# free of the rightful claim of any third person regarding infringement ++# or the like. Any license provided herein, whether implied or ++# otherwise, applies only to this software file. Patent licenses, if ++# any, provided herein do not apply to combinations of this program with ++# other software, or any other product whatsoever. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write the Free Software Foundation, ++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# ++# ++ ++# Initialization: ++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++ ++if [ -n "${OCF_DEBUG_LIBRARY}" ]; then ++ . ${OCF_DEBUG_LIBRARY} ++else ++ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++fi ++ ++# Defaults ++OCF_RESKEY_ganesha_active_default="ganesha-active" ++OCF_RESKEY_grace_active_default="grace-active" ++OCF_RESKEY_grace_delay_default="5" ++ ++: ${OCF_RESKEY_ganesha_active=${OCF_RESKEY_ganesha_active_default}} ++: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} ++: ${OCF_RESKEY_grace_delay=${OCF_RESKEY_grace_delay_default}} ++ ++ganesha_meta_data() { ++ cat < ++ ++ ++1.0 ++ ++ ++This Linux-specific resource agent acts as a dummy ++resource agent for nfs-ganesha. ++ ++ ++Manages the user-space nfs-ganesha NFS server ++ ++ ++ ++NFS-Ganesha daemon active attribute ++NFS-Ganesha daemon active attribute ++ ++ ++ ++NFS-Ganesha grace active attribute ++NFS-Ganesha grace active attribute ++ ++ ++ ++ ++NFS-Ganesha grace delay. ++When changing this, adjust the ganesha_grace RA's monitor interval to match. ++ ++NFS-Ganesha grace delay ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++END ++ ++return ${OCF_SUCCESS} ++} ++ ++ganesha_mon_usage() { ++ echo "ganesha.nfsd USAGE" ++} ++ ++# Make sure meta-data and usage always succeed ++case ${__OCF_ACTION} in ++ meta-data) ganesha_meta_data ++ exit ${OCF_SUCCESS} ++ ;; ++ usage|help) ganesha_usage ++ exit ${OCF_SUCCESS} ++ ;; ++ *) ++ ;; ++esac ++ ++ganesha_mon_start() ++{ ++ ocf_log debug "ganesha_mon_start" ++ ganesha_mon_monitor ++ return $OCF_SUCCESS ++} ++ ++ganesha_mon_stop() ++{ ++ ocf_log debug "ganesha_mon_stop" ++ return $OCF_SUCCESS ++} ++ ++ganesha_mon_monitor() ++{ ++ local host=$(hostname -s) ++ local pid_file="/var/run/ganesha.pid" ++ local rhel6_pid_file="/var/run/ganesha.nfsd.pid" ++ local proc_pid="/proc/" ++ ++ # RHEL6 /etc/init.d/nfs-ganesha adds -p /var/run/ganesha.nfsd.pid ++ # RHEL7 systemd does not. Would be nice if all distros used the ++ # same pid file. ++ if [ -e ${rhel6_pid_file} ]; then ++ pid_file=${rhel6_pid_file} ++ fi ++ if [ -e ${pid_file} ]; then ++ proc_pid="${proc_pid}$(cat ${pid_file})" ++ fi ++ ++ if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then ++ ++ attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 ++ if [ $? -ne 0 ]; then ++ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed" ++ fi ++ ++ # ganesha_grace (nfs-grace) RA follows grace-active attr ++ # w/ constraint location ++ attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 ++ if [ $? -ne 0 ]; then ++ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed" ++ fi ++ ++ # ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace) ++ # track grace-active crm_attr (attr != crm_attr) ++ # we can't just use the attr as there's no way to query ++ # its value in RHEL6 pacemaker ++ ++ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null ++ if [ $? -ne 0 ]; then ++ host=$(hostname) ++ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null ++ if [ $? -ne 0 ]; then ++ ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed" ++ fi ++ fi ++ ++ return ${OCF_SUCCESS} ++ fi ++ ++ # VIP fail-over is triggered by clearing the ++ # ganesha-active node attribute on this node. ++ # ++ # Meanwhile the ganesha_grace notify() runs when its ++ # nfs-grace resource is disabled on a node; which ++ # is triggered by clearing the grace-active attribute ++ # on this node. ++ # ++ # We need to allow time for it to run and put ++ # the remaining ganesha.nfsds into grace before ++ # initiating the VIP fail-over. ++ ++ attrd_updater -D -n ${OCF_RESKEY_grace_active} ++ if [ $? -ne 0 ]; then ++ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed" ++ fi ++ ++ host=$(hostname -s) ++ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null ++ if [ $? -ne 0 ]; then ++ host=$(hostname) ++ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null ++ if [ $? -ne 0 ]; then ++ ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed" ++ fi ++ fi ++ ++ sleep ${OCF_RESKEY_grace_delay} ++ ++ attrd_updater -D -n ${OCF_RESKEY_ganesha_active} ++ if [ $? -ne 0 ]; then ++ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed" ++ fi ++ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_mon_validate() ++{ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_mon_validate ++ ++# Translate each action into the appropriate function call ++case ${__OCF_ACTION} in ++start) ganesha_mon_start ++ ;; ++stop) ganesha_mon_stop ++ ;; ++status|monitor) ganesha_mon_monitor ++ ;; ++*) ganesha_mon_usage ++ exit ${OCF_ERR_UNIMPLEMENTED} ++ ;; ++esac ++ ++rc=$? ++ ++# The resource agent may optionally log a debug message ++ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" ++exit $rc +diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd +new file mode 100644 +index 0000000..93fc8be +--- /dev/null ++++ b/extras/ganesha/ocf/ganesha_nfsd +@@ -0,0 +1,167 @@ ++#!/bin/bash ++# ++# Copyright (c) 2014 Anand Subramanian anands@redhat.com ++# Copyright (c) 2015 Red Hat Inc. ++# All Rights Reserved. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of version 2 of the GNU General Public License as ++# published by the Free Software Foundation. ++# ++# This program is distributed in the hope that it would be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ++# ++# Further, this software is distributed without any warranty that it is ++# free of the rightful claim of any third person regarding infringement ++# or the like. Any license provided herein, whether implied or ++# otherwise, applies only to this software file. Patent licenses, if ++# any, provided herein do not apply to combinations of this program with ++# other software, or any other product whatsoever. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write the Free Software Foundation, ++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# ++# ++ ++# Initialization: ++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++ ++if [ -n "${OCF_DEBUG_LIBRARY}" ]; then ++ . ${OCF_DEBUG_LIBRARY} ++else ++ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++fi ++ ++OCF_RESKEY_ha_vol_mnt_default="/var/run/gluster/shared_storage" ++: ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}} ++ ++ganesha_meta_data() { ++ cat < ++ ++ ++1.0 ++ ++ ++This Linux-specific resource agent acts as a dummy ++resource agent for nfs-ganesha. ++ ++ ++Manages the user-space nfs-ganesha NFS server ++ ++ ++ ++HA State Volume Mount Point ++HA_State Volume Mount Point ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++END ++ ++return ${OCF_SUCCESS} ++} ++ ++ganesha_nfsd_usage() { ++ echo "ganesha.nfsd USAGE" ++} ++ ++# Make sure meta-data and usage always succeed ++case $__OCF_ACTION in ++ meta-data) ganesha_meta_data ++ exit ${OCF_SUCCESS} ++ ;; ++ usage|help) ganesha_usage ++ exit ${OCF_SUCCESS} ++ ;; ++ *) ++ ;; ++esac ++ ++ganesha_nfsd_start() ++{ ++ local long_host=$(hostname) ++ ++ if [[ -d /var/lib/nfs ]]; then ++ mv /var/lib/nfs /var/lib/nfs.backup ++ if [ $? -ne 0 ]; then ++ ocf_log notice "mv /var/lib/nfs /var/lib/nfs.backup failed" ++ fi ++ ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs ++ if [ $? -ne 0 ]; then ++ ocf_log notice "ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs failed" ++ fi ++ fi ++ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_nfsd_stop() ++{ ++ ++ if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then ++ rm -f /var/lib/nfs ++ if [ $? -ne 0 ]; then ++ ocf_log notice "rm -f /var/lib/nfs failed" ++ fi ++ mv /var/lib/nfs.backup /var/lib/nfs ++ if [ $? -ne 0 ]; then ++ ocf_log notice "mv /var/lib/nfs.backup /var/lib/nfs failed" ++ fi ++ fi ++ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_nfsd_monitor() ++{ ++ # pacemaker checks to see if RA is already running before starting it. ++ # if we return success, then it's presumed it's already running and ++ # doesn't need to be started, i.e. invoke the start action. ++ # return something other than success to make pacemaker invoke the ++ # start action ++ if [[ -L /var/lib/nfs ]]; then ++ return ${OCF_SUCCESS} ++ fi ++ return ${OCF_NOT_RUNNING} ++} ++ ++ganesha_nfsd_validate() ++{ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_nfsd_validate ++ ++# ocf_log notice "ganesha_nfsd ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION" ++ ++# Translate each action into the appropriate function call ++case $__OCF_ACTION in ++start) ganesha_nfsd_start ++ ;; ++stop) ganesha_nfsd_stop ++ ;; ++status|monitor) ganesha_nfsd_monitor ++ ;; ++*) ganesha_nfsd_usage ++ exit ${OCF_ERR_UNIMPLEMENTED} ++ ;; ++esac ++ ++rc=$? ++ ++# The resource agent may optionally log a debug message ++ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" ++exit $rc +diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am +index 00a2c45..7e345fd 100644 +--- a/extras/ganesha/scripts/Makefile.am ++++ b/extras/ganesha/scripts/Makefile.am +@@ -1,4 +1,6 @@ +-EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh ++EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh \ ++ ganesha-ha.sh + + scriptsdir = $(libexecdir)/ganesha +-scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py ++scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py \ ++ ganesha-ha.sh +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +new file mode 100644 +index 0000000..6b011be +--- /dev/null ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -0,0 +1,1125 @@ ++#!/bin/bash ++ ++# Copyright 2015-2016 Red Hat Inc. All Rights Reserved ++# ++# Pacemaker+Corosync High Availability for NFS-Ganesha ++# ++# setup, teardown, add, delete, refresh-config, and status ++# ++# Each participating node in the cluster is assigned a virtual IP (VIP) ++# which fails over to another node when its associated ganesha.nfsd dies ++# for any reason. After the VIP is moved to another node all the ++# ganesha.nfsds are send a signal using DBUS to put them into NFS GRACE. ++# ++# There are six resource agent types used: ganesha_mon, ganesha_grace, ++# ganesha_nfsd, IPaddr, and Dummy. ganesha_mon is used to monitor the ++# ganesha.nfsd. ganesha_grace is used to send the DBUS signal to put ++# the remaining ganesha.nfsds into grace. ganesha_nfsd is used to start ++# and stop the ganesha.nfsd during setup and teardown. IPaddr manages ++# the VIP. A Dummy resource named $hostname-trigger_ip-1 is used to ++# ensure that the NFS GRACE DBUS signal is sent after the VIP moves to ++# the new host. ++ ++HA_NUM_SERVERS=0 ++HA_SERVERS="" ++HA_VOL_NAME="gluster_shared_storage" ++HA_VOL_MNT="/var/run/gluster/shared_storage" ++HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" ++SERVICE_MAN="DISTRO_NOT_FOUND" ++ ++RHEL6_PCS_CNAME_OPTION="--name" ++SECRET_PEM="/var/lib/glusterd/nfs/secret.pem" ++ ++# UNBLOCK RA uses shared_storage which may become unavailable ++# during any of the nodes reboot. Hence increase timeout value. ++PORTBLOCK_UNBLOCK_TIMEOUT="60s" ++ ++# Try loading the config from any of the distro ++# specific configuration locations ++if [ -f /etc/sysconfig/ganesha ] ++ then ++ . /etc/sysconfig/ganesha ++fi ++if [ -f /etc/conf.d/ganesha ] ++ then ++ . /etc/conf.d/ganesha ++fi ++if [ -f /etc/default/ganesha ] ++ then ++ . /etc/default/ganesha ++fi ++ ++GANESHA_CONF= ++ ++function find_rhel7_conf ++{ ++ while [[ $# > 0 ]] ++ do ++ key="$1" ++ case $key in ++ -f) ++ CONFFILE="$2" ++ break; ++ ;; ++ *) ++ ;; ++ esac ++ shift ++ done ++} ++ ++if [ -z $CONFFILE ] ++ then ++ find_rhel7_conf $OPTIONS ++ ++fi ++ ++GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf} ++ ++usage() { ++ ++ echo "Usage : add|delete|refresh-config|status" ++ echo "Add-node : ganesha-ha.sh --add \ ++ " ++ echo "Delete-node: ganesha-ha.sh --delete \ ++" ++ echo "Refresh-config : ganesha-ha.sh --refresh-config \ ++" ++ echo "Status : ganesha-ha.sh --status " ++} ++ ++determine_service_manager () { ++ ++ if [ -e "/usr/bin/systemctl" ]; ++ then ++ SERVICE_MAN="/usr/bin/systemctl" ++ elif [ -e "/sbin/invoke-rc.d" ]; ++ then ++ SERVICE_MAN="/sbin/invoke-rc.d" ++ elif [ -e "/sbin/service" ]; ++ then ++ SERVICE_MAN="/sbin/service" ++ fi ++ if [ "$SERVICE_MAN" == "DISTRO_NOT_FOUND" ] ++ then ++ echo "Service manager not recognized, exiting" ++ exit 1 ++ fi ++} ++ ++manage_service () ++{ ++ local action=${1} ++ local new_node=${2} ++ local option= ++ ++ if [ "$action" == "start" ]; then ++ option="yes" ++ else ++ option="no" ++ fi ++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ++${SECRET_PEM} root@${new_node} "/usr/libexec/ganesha/ganesha-ha.sh --setup-ganesha-conf-files $HA_CONFDIR $option" ++ ++ if [ "$SERVICE_MAN" == "/usr/bin/systemctl" ] ++ then ++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ++${SECRET_PEM} root@${new_node} "$SERVICE_MAN ${action} nfs-ganesha" ++ else ++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ++${SECRET_PEM} root@${new_node} "$SERVICE_MAN nfs-ganesha ${action}" ++ fi ++} ++ ++ ++check_cluster_exists() ++{ ++ local name=${1} ++ local cluster_name="" ++ ++ if [ -e /var/run/corosync.pid ]; then ++ cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3) ++ if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then ++ logger "$name already exists, exiting" ++ exit 0 ++ fi ++ fi ++} ++ ++ ++determine_servers() ++{ ++ local cmd=${1} ++ local num_servers=0 ++ local tmp_ifs=${IFS} ++ local ha_servers="" ++ ++ if [ "X${cmd}X" != "XsetupX" -a "X${cmd}X" != "XstatusX" ]; then ++ ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//') ++ IFS=$' ' ++ for server in ${ha_servers} ; do ++ num_servers=$(expr ${num_servers} + 1) ++ done ++ IFS=${tmp_ifs} ++ HA_NUM_SERVERS=${num_servers} ++ HA_SERVERS="${ha_servers}" ++ else ++ IFS=$',' ++ for server in ${HA_CLUSTER_NODES} ; do ++ num_servers=$(expr ${num_servers} + 1) ++ done ++ IFS=${tmp_ifs} ++ HA_NUM_SERVERS=${num_servers} ++ HA_SERVERS="${HA_CLUSTER_NODES//,/ }" ++ fi ++} ++ ++ ++setup_cluster() ++{ ++ local name=${1} ++ local num_servers=${2} ++ local servers=${3} ++ local unclean="" ++ local quorum_policy="stop" ++ ++ logger "setting up cluster ${name} with the following ${servers}" ++ ++ pcs cluster auth ${servers} ++ # pcs cluster setup --name ${name} ${servers} ++ pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers} ++ if [ $? -ne 0 ]; then ++ logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" ++ exit 1; ++ fi ++ pcs cluster start --all ++ if [ $? -ne 0 ]; then ++ logger "pcs cluster start failed" ++ exit 1; ++ fi ++ ++ sleep 1 ++ # wait for the cluster to elect a DC before querying or writing ++ # to the CIB. BZ 1334092 ++ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 ++ while [ $? -ne 0 ]; do ++ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 ++ done ++ ++ unclean=$(pcs status | grep -u "UNCLEAN") ++ while [[ "${unclean}X" = "UNCLEANX" ]]; do ++ sleep 1 ++ unclean=$(pcs status | grep -u "UNCLEAN") ++ done ++ sleep 1 ++ ++ if [ ${num_servers} -lt 3 ]; then ++ quorum_policy="ignore" ++ fi ++ pcs property set no-quorum-policy=${quorum_policy} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" ++ fi ++ ++ pcs property set stonith-enabled=false ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs property set stonith-enabled=false failed" ++ fi ++} ++ ++ ++setup_finalize_ha() ++{ ++ local cibfile=${1} ++ local stopped="" ++ ++ stopped=$(pcs status | grep -u "Stopped") ++ while [[ "${stopped}X" = "StoppedX" ]]; do ++ sleep 1 ++ stopped=$(pcs status | grep -u "Stopped") ++ done ++} ++ ++ ++refresh_config () ++{ ++ local short_host=$(hostname -s) ++ local VOL=${1} ++ local HA_CONFDIR=${2} ++ local short_host=$(hostname -s) ++ ++ local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\ ++ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') ++ ++ ++ if [ -e ${SECRET_PEM} ]; then ++ while [[ ${3} ]]; do ++ current_host=`echo ${3} | cut -d "." -f 1` ++ if [ ${short_host} != ${current_host} ]; then ++ output=$(ssh -oPasswordAuthentication=no \ ++-oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ ++"dbus-send --print-reply --system --dest=org.ganesha.nfsd \ ++/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ ++string:$HA_CONFDIR/exports/export.$VOL.conf \ ++string:\"EXPORT(Export_Id=$export_id)\" 2>&1") ++ ret=$? ++ logger <<< "${output}" ++ if [ ${ret} -ne 0 ]; then ++ echo "Error: refresh-config failed on ${current_host}." ++ exit 1 ++ else ++ echo "Refresh-config completed on ${current_host}." ++ fi ++ ++ fi ++ shift ++ done ++ else ++ echo "Error: refresh-config failed. Passwordless ssh is not enabled." ++ exit 1 ++ fi ++ ++ # Run the same command on the localhost, ++ output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \ ++/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ ++string:$HA_CONFDIR/exports/export.$VOL.conf \ ++string:"EXPORT(Export_Id=$export_id)" 2>&1) ++ ret=$? ++ logger <<< "${output}" ++ if [ ${ret} -ne 0 ] ; then ++ echo "Error: refresh-config failed on localhost." ++ exit 1 ++ else ++ echo "Success: refresh-config completed." ++ fi ++} ++ ++ ++teardown_cluster() ++{ ++ local name=${1} ++ ++ for server in ${HA_SERVERS} ; do ++ if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then ++ logger "info: ${server} is not in config, removing" ++ ++ pcs cluster stop ${server} --force ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster stop ${server} failed" ++ fi ++ ++ pcs cluster node remove ${server} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster node remove ${server} failed" ++ fi ++ fi ++ done ++ ++ # BZ 1193433 - pcs doesn't reload cluster.conf after modification ++ # after teardown completes, a subsequent setup will appear to have ++ # 'remembered' the deleted node. You can work around this by ++ # issuing another `pcs cluster node remove $node`, ++ # `crm_node -f -R $server`, or ++ # `cibadmin --delete --xml-text '' ++ ++ pcs cluster stop --all ++ if [ $? -ne 0 ]; then ++ logger "warning pcs cluster stop --all failed" ++ fi ++ ++ pcs cluster destroy ++ if [ $? -ne 0 ]; then ++ logger "error pcs cluster destroy failed" ++ exit 1 ++ fi ++} ++ ++ ++cleanup_ganesha_config () ++{ ++ rm -f /etc/corosync/corosync.conf ++ rm -rf /etc/cluster/cluster.conf* ++ rm -rf /var/lib/pacemaker/cib/* ++} ++ ++do_create_virt_ip_constraints() ++{ ++ local cibfile=${1}; shift ++ local primary=${1}; shift ++ local weight="1000" ++ ++ # first a constraint location rule that says the VIP must be where ++ # there's a ganesha.nfsd running ++ pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed" ++ fi ++ ++ # then a set of constraint location prefers to set the prefered order ++ # for where a VIP should move ++ while [[ ${1} ]]; do ++ pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed" ++ fi ++ weight=$(expr ${weight} + 1000) ++ shift ++ done ++ # and finally set the highest preference for the VIP to its home node ++ # default weight when created is/was 100. ++ # on Fedora setting appears to be additive, so to get the desired ++ # value we adjust the weight ++ # weight=$(expr ${weight} - 100) ++ pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed" ++ fi ++} ++ ++ ++wrap_create_virt_ip_constraints() ++{ ++ local cibfile=${1}; shift ++ local primary=${1}; shift ++ local head="" ++ local tail="" ++ ++ # build a list of peers, e.g. for a four node cluster, for node1, ++ # the result is "node2 node3 node4"; for node2, "node3 node4 node1" ++ # and so on. ++ while [[ ${1} ]]; do ++ if [ "${1}" = "${primary}" ]; then ++ shift ++ while [[ ${1} ]]; do ++ tail=${tail}" "${1} ++ shift ++ done ++ else ++ head=${head}" "${1} ++ fi ++ shift ++ done ++ do_create_virt_ip_constraints ${cibfile} ${primary} ${tail} ${head} ++} ++ ++ ++create_virt_ip_constraints() ++{ ++ local cibfile=${1}; shift ++ ++ while [[ ${1} ]]; do ++ wrap_create_virt_ip_constraints ${cibfile} ${1} ${HA_SERVERS} ++ shift ++ done ++} ++ ++ ++setup_create_resources() ++{ ++ local cibfile=$(mktemp -u) ++ ++ # fixup /var/lib/nfs ++ logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone" ++ pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed" ++ fi ++ ++ pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone failed" ++ fi ++ ++ # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace ++ # start method. Allow time for ganesha_mon to start and set the ++ # ganesha-active crm_attribute ++ sleep 5 ++ ++ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone meta notify=true ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed" ++ fi ++ ++ pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1 ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1" ++ fi ++ ++ pcs cluster cib ${cibfile} ++ ++ while [[ ${1} ]]; do ++ ++ # this is variable indirection ++ # from a nvs like 'VIP_host1=10.7.6.5' or 'VIP_host1="10.7.6.5"' ++ # (or VIP_host-1=..., or VIP_host-1.my.domain.name=...) ++ # a variable 'clean_name' is created (e.g. w/ value 'VIP_host_1') ++ # and a clean nvs (e.g. w/ value 'VIP_host_1="10_7_6_5"') ++ # after the `eval ${clean_nvs}` there is a variable VIP_host_1 ++ # with the value '10_7_6_5', and the following \$$ magic to ++ # reference it, i.e. `eval tmp_ipaddr=\$${clean_name}` gives us ++ # ${tmp_ipaddr} with 10_7_6_5 and then convert the _s back to .s ++ # to give us ipaddr="10.7.6.5". whew! ++ name="VIP_${1}" ++ clean_name=${name//[-.]/_} ++ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf) ++ clean_nvs=${nvs//[-.]/_} ++ eval ${clean_nvs} ++ eval tmp_ipaddr=\$${clean_name} ++ ipaddr=${tmp_ipaddr//_/.} ++ ++ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ ++ portno=2049 action=block ip=${ipaddr} --group ${1}-group ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-nfs_block failed" ++ fi ++ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ ++ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ ++ cidr_netmask=32 op monitor interval=15s failed" ++ fi ++ ++ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" ++ fi ++ ++ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ ++ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ ++ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ ++ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ ++ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-nfs_unblock failed" ++ fi ++ ++ ++ shift ++ done ++ ++ create_virt_ip_constraints ${cibfile} ${HA_SERVERS} ++ ++ pcs cluster cib-push ${cibfile} ++ if [ $? -ne 0 ]; then ++ logger "warning pcs cluster cib-push ${cibfile} failed" ++ fi ++ rm -f ${cibfile} ++} ++ ++ ++teardown_resources() ++{ ++ # local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2) ++ ++ # restore /var/lib/nfs ++ logger "notice: pcs resource delete nfs_setup-clone" ++ pcs resource delete nfs_setup-clone ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource delete nfs_setup-clone failed" ++ fi ++ ++ # delete -clone resource agents ++ # in particular delete the ganesha monitor so we don't try to ++ # trigger anything when we shut down ganesha next. ++ pcs resource delete nfs-mon-clone ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource delete nfs-mon-clone failed" ++ fi ++ ++ pcs resource delete nfs-grace-clone ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource delete nfs-grace-clone failed" ++ fi ++ ++ while [[ ${1} ]]; do ++ pcs resource delete ${1}-group ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource delete ${1}-group failed" ++ fi ++ shift ++ done ++ ++} ++ ++ ++recreate_resources() ++{ ++ local cibfile=${1}; shift ++ ++ while [[ ${1} ]]; do ++ # this is variable indirection ++ # see the comment on the same a few lines up ++ name="VIP_${1}" ++ clean_name=${name//[-.]/_} ++ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf) ++ clean_nvs=${nvs//[-.]/_} ++ eval ${clean_nvs} ++ eval tmp_ipaddr=\$${clean_name} ++ ipaddr=${tmp_ipaddr//_/.} ++ ++ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ ++ portno=2049 action=block ip=${ipaddr} --group ${1}-group ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-nfs_block failed" ++ fi ++ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ ++ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ ++ cidr_netmask=32 op monitor interval=15s failed" ++ fi ++ ++ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" ++ fi ++ ++ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ ++ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ ++ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ ++ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ ++ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-nfs_unblock failed" ++ fi ++ ++ shift ++ done ++} ++ ++ ++addnode_recreate_resources() ++{ ++ local cibfile=${1}; shift ++ local add_node=${1}; shift ++ local add_vip=${1}; shift ++ ++ recreate_resources ${cibfile} ${HA_SERVERS} ++ ++ pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \ ++ protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${add_node}-nfs_block failed" ++ fi ++ pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ ++ ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \ ++ --after ${add_node}-nfs_block ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ ++ ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" ++ fi ++ ++ pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed" ++ fi ++ pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \ ++ protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \ ++ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \ ++ ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \ ++ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \ ++ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${add_node}-nfs_unblock failed" ++ fi ++} ++ ++ ++clear_resources() ++{ ++ local cibfile=${1}; shift ++ ++ while [[ ${1} ]]; do ++ pcs -f ${cibfile} resource delete ${1}-group ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs -f ${cibfile} resource delete ${1}-group" ++ fi ++ ++ shift ++ done ++} ++ ++ ++addnode_create_resources() ++{ ++ local add_node=${1}; shift ++ local add_vip=${1}; shift ++ local cibfile=$(mktemp -u) ++ ++ # start HA on the new node ++ pcs cluster start ${add_node} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster start ${add_node} failed" ++ fi ++ ++ pcs cluster cib ${cibfile} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster cib ${cibfile} failed" ++ fi ++ ++ # delete all the -cluster_ip-1 resources, clearing ++ # their constraints, then create them again so we can ++ # recompute their constraints ++ clear_resources ${cibfile} ${HA_SERVERS} ++ addnode_recreate_resources ${cibfile} ${add_node} ${add_vip} ++ ++ HA_SERVERS="${HA_SERVERS} ${add_node}" ++ create_virt_ip_constraints ${cibfile} ${HA_SERVERS} ++ ++ pcs cluster cib-push ${cibfile} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster cib-push ${cibfile} failed" ++ fi ++ rm -f ${cibfile} ++} ++ ++ ++deletenode_delete_resources() ++{ ++ local node=${1}; shift ++ local ha_servers=$(echo "${HA_SERVERS}" | sed s/${node}//) ++ local cibfile=$(mktemp -u) ++ ++ pcs cluster cib ${cibfile} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster cib ${cibfile} failed" ++ fi ++ ++ # delete all the -cluster_ip-1 and -trigger_ip-1 resources, ++ # clearing their constraints, then create them again so we can ++ # recompute their constraints ++ clear_resources ${cibfile} ${HA_SERVERS} ++ recreate_resources ${cibfile} ${ha_servers} ++ HA_SERVERS=$(echo "${ha_servers}" | sed -e "s/ / /") ++ ++ create_virt_ip_constraints ${cibfile} ${HA_SERVERS} ++ ++ pcs cluster cib-push ${cibfile} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster cib-push ${cibfile} failed" ++ fi ++ rm -f ${cibfile} ++ ++} ++ ++ ++deletenode_update_haconfig() ++{ ++ local name="VIP_${1}" ++ local clean_name=${name//[-.]/_} ++ ++ ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/") ++ sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf ++} ++ ++ ++setup_state_volume() ++{ ++ local mnt=${HA_VOL_MNT} ++ local longname="" ++ local shortname="" ++ local dname="" ++ local dirname="" ++ ++ longname=$(hostname) ++ dname=${longname#$(hostname -s)} ++ ++ while [[ ${1} ]]; do ++ ++ if [[ ${1} == *${dname} ]]; then ++ dirname=${1} ++ else ++ dirname=${1}${dname} ++ fi ++ ++ if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then ++ mkdir ${mnt}/nfs-ganesha/tickle_dir ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname} ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd ++ fi ++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then ++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ++ fi ++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then ++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ++ fi ++ for server in ${HA_SERVERS} ; do ++ if [ ${server} != ${dirname} ]; then ++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} ++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} ++ fi ++ done ++ shift ++ done ++ ++} ++ ++ ++addnode_state_volume() ++{ ++ local newnode=${1}; shift ++ local mnt=${HA_VOL_MNT} ++ local longname="" ++ local dname="" ++ local dirname="" ++ ++ longname=$(hostname) ++ dname=${longname#$(hostname -s)} ++ ++ if [[ ${newnode} == *${dname} ]]; then ++ dirname=${newnode} ++ else ++ dirname=${newnode}${dname} ++ fi ++ ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname} ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd ++ fi ++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then ++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ++ fi ++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then ++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ++ fi ++ ++ for server in ${HA_SERVERS} ; do ++ ++ if [[ ${server} != ${dirname} ]]; then ++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} ++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} ++ ++ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} ++ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} ++ fi ++ done ++ ++} ++ ++ ++delnode_state_volume() ++{ ++ local delnode=${1}; shift ++ local mnt=${HA_VOL_MNT} ++ local longname="" ++ local dname="" ++ local dirname="" ++ ++ longname=$(hostname) ++ dname=${longname#$(hostname -s)} ++ ++ if [[ ${delnode} == *${dname} ]]; then ++ dirname=${delnode} ++ else ++ dirname=${delnode}${dname} ++ fi ++ ++ rm -rf ${mnt}/nfs-ganesha/${dirname} ++ ++ for server in ${HA_SERVERS} ; do ++ if [[ "${server}" != "${dirname}" ]]; then ++ rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} ++ rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} ++ fi ++ done ++} ++ ++ ++status() ++{ ++ local scratch=$(mktemp) ++ local regex_str="^${1}-cluster_ip-1" ++ local healthy=0 ++ local index=1 ++ local nodes ++ ++ # change tabs to spaces, strip leading spaces ++ pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*//" > ${scratch} ++ ++ nodes[0]=${1}; shift ++ ++ # make a regex of the configured nodes ++ # and initalize the nodes array for later ++ while [[ ${1} ]]; do ++ ++ regex_str="${regex_str}|^${1}-cluster_ip-1" ++ nodes[${index}]=${1} ++ ((index++)) ++ shift ++ done ++ ++ # print the nodes that are expected to be online ++ grep -E "^Online:" ${scratch} ++ ++ echo ++ ++ # print the VIPs and which node they are on ++ grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4 ++ ++ echo ++ ++ # check if the VIP and port block/unblock RAs are on the expected nodes ++ for n in ${nodes[*]}; do ++ ++ grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ result=$? ++ ((healthy+=${result})) ++ grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ result=$? ++ ((healthy+=${result})) ++ grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ result=$? ++ ((healthy+=${result})) ++ done ++ ++ grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch} ++ result=$? ++ ++ if [ ${result} -eq 0 ]; then ++ echo "Cluster HA Status: BAD" ++ elif [ ${healthy} -eq 0 ]; then ++ echo "Cluster HA Status: HEALTHY" ++ else ++ echo "Cluster HA Status: FAILOVER" ++ fi ++ ++ rm -f ${scratch} ++} ++ ++create_ganesha_conf_file() ++{ ++ if [ $1 == "yes" ]; ++ then ++ if [ -e $GANESHA_CONF ]; ++ then ++ rm -rf $GANESHA_CONF ++ fi ++ # The symlink /etc/ganesha/ganesha.conf need to be ++ # created using ganesha conf file mentioned in the ++ # shared storage. Every node will only have this ++ # link and actual file will stored in shared storage, ++ # so that ganesha conf editing of ganesha conf will ++ # be easy as well as it become more consistent. ++ ++ ln -s $HA_CONFDIR/ganesha.conf $GANESHA_CONF ++ else ++ # Restoring previous file ++ rm -rf $GANESHA_CONF ++ cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF ++ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF ++ fi ++} ++ ++set_quorum_policy() ++{ ++ local quorum_policy="stop" ++ local num_servers=${1} ++ ++ if [ ${num_servers} -lt 3 ]; then ++ quorum_policy="ignore" ++ fi ++ pcs property set no-quorum-policy=${quorum_policy} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" ++ fi ++} ++ ++main() ++{ ++ ++ local cmd=${1}; shift ++ if [[ ${cmd} == *help ]]; then ++ usage ++ exit 0 ++ fi ++ HA_CONFDIR=${1%/}; shift ++ local ha_conf=${HA_CONFDIR}/ganesha-ha.conf ++ local node="" ++ local vip="" ++ ++ # ignore any comment lines ++ cfgline=$(grep ^HA_NAME= ${ha_conf}) ++ eval $(echo ${cfgline} | grep -F HA_NAME=) ++ cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf}) ++ eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=) ++ ++ case "${cmd}" in ++ ++ setup | --setup) ++ logger "setting up ${HA_NAME}" ++ ++ check_cluster_exists ${HA_NAME} ++ ++ determine_servers "setup" ++ ++ if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then ++ ++ setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}" ++ ++ setup_create_resources ${HA_SERVERS} ++ ++ setup_finalize_ha ++ ++ setup_state_volume ${HA_SERVERS} ++ ++ else ++ ++ logger "insufficient servers for HA, aborting" ++ fi ++ ;; ++ ++ teardown | --teardown) ++ logger "tearing down ${HA_NAME}" ++ ++ determine_servers "teardown" ++ ++ teardown_resources ${HA_SERVERS} ++ ++ teardown_cluster ${HA_NAME} ++ ++ cleanup_ganesha_config ${HA_CONFDIR} ++ ;; ++ ++ cleanup | --cleanup) ++ cleanup_ganesha_config ${HA_CONFDIR} ++ ;; ++ ++ add | --add) ++ node=${1}; shift ++ vip=${1}; shift ++ ++ logger "adding ${node} with ${vip} to ${HA_NAME}" ++ ++ determine_service_manager ++ ++ manage_service "start" ${node} ++ ++ determine_servers "add" ++ ++ pcs cluster node add ${node} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster node add ${node} failed" ++ fi ++ ++ addnode_create_resources ${node} ${vip} ++ # Subsequent add-node recreates resources for all the nodes ++ # that already exist in the cluster. The nodes are picked up ++ # from the entries in the ganesha-ha.conf file. Adding the ++ # newly added node to the file so that the resources specfic ++ # to this node is correctly recreated in the future. ++ clean_node=${node//[-.]/_} ++ echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf ++ ++ NEW_NODES="$HA_CLUSTER_NODES,${node}" ++ ++ sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \ ++$HA_CONFDIR/ganesha-ha.conf ++ ++ addnode_state_volume ${node} ++ ++ # addnode_create_resources() already appended ${node} to ++ # HA_SERVERS, so only need to increment HA_NUM_SERVERS ++ # and set quorum policy ++ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1) ++ set_quorum_policy ${HA_NUM_SERVERS} ++ ;; ++ ++ delete | --delete) ++ node=${1}; shift ++ ++ logger "deleting ${node} from ${HA_NAME}" ++ ++ determine_servers "delete" ++ ++ deletenode_delete_resources ${node} ++ ++ pcs cluster node remove ${node} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster node remove ${node} failed" ++ fi ++ ++ deletenode_update_haconfig ${node} ++ ++ delnode_state_volume ${node} ++ ++ determine_service_manager ++ ++ manage_service "stop" ${node} ++ ++ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1) ++ set_quorum_policy ${HA_NUM_SERVERS} ++ ;; ++ ++ status | --status) ++ determine_servers "status" ++ ++ status ${HA_SERVERS} ++ ;; ++ ++ refresh-config | --refresh-config) ++ VOL=${1} ++ ++ determine_servers "refresh-config" ++ ++ refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS} ++ ;; ++ ++ setup-ganesha-conf-files | --setup-ganesha-conf-files) ++ ++ create_ganesha_conf_file ${1} ++ ;; ++ ++ *) ++ # setup and teardown are not intended to be used by a ++ # casual user ++ usage ++ logger "Usage: ganesha-ha.sh add|delete|status" ++ ;; ++ ++ esac ++} ++ ++main $* +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index dd7438c..d748ebc 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -460,7 +460,8 @@ Summary: NFS-Ganesha configuration + Group: Applications/File + + Requires: %{name}-server%{?_isa} = %{version}-%{release} +-Requires: nfs-ganesha-gluster, pcs, dbus ++Requires: nfs-ganesha-gluster >= 2.4.1 ++Requires: pcs, dbus + %if ( 0%{?rhel} && 0%{?rhel} == 6 ) + Requires: cman, pacemaker, corosync + %endif +@@ -1138,6 +1139,7 @@ exit 0 + #exclude ganesha related files + %exclude %{_sysconfdir}/ganesha/* + %exclude %{_libexecdir}/ganesha/* ++%exclude %{_prefix}/lib/ocf/resource.d/heartbeat/* + %endif + + %files api +@@ -1306,6 +1308,7 @@ exit 0 + %files ganesha + %{_sysconfdir}/ganesha/* + %{_libexecdir}/ganesha/* ++%{_prefix}/lib/ocf/resource.d/heartbeat/* + %endif + + %if ( 0%{!?_without_ocf:1} ) +@@ -1904,6 +1907,9 @@ fi + %endif + + %changelog ++* Sat Apr 6 2019 Jiffin Tony Thottan ++- Adding ganesha ha resources back in gluster repository ++ + * Fri Apr 5 2019 Jiffin Tony Thottan + - Adding ganesha bits back in gluster repository + +-- +1.8.3.1 + diff --git a/SOURCES/0056-common-ha-fixes-for-Debian-based-systems.patch b/SOURCES/0056-common-ha-fixes-for-Debian-based-systems.patch new file mode 100644 index 0000000..fef23b1 --- /dev/null +++ b/SOURCES/0056-common-ha-fixes-for-Debian-based-systems.patch @@ -0,0 +1,229 @@ +From 2c1a83920b959a1ec170243d1eec71b1e2c074b0 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Fri, 7 Apr 2017 09:09:29 -0400 +Subject: [PATCH 056/124] common-ha: fixes for Debian-based systems + +1) Debian-based systems don't have /usr/libexec/... and there is +a hard-coded invocation of /usr/libexec/ganesha/ganesha-ha.sh within +ganesha-ha.sh itself. +Fix: save $0 and use it instead for further invocations of self. + +2) default shell is /bin/dash (not /bin/bash). Various runner_run() +invocations for ganesha used what amounts to + exec("sh /usr/$libexec/ganesha/ganesha-ha.sh ...); +which executes the script using the default shell, but there are +some bash-specific idioms that don't work if the shell is dash. +Fix: change to exec("/usr/$libexec/ganesha/ganesha-ha.sh ...); so that +the shebang forces the use of /bin/bash + +3) Fedora and RHEL7 have merged /bin/ and /usr/bin, /bin is a symlink +to /usr/bin. Debian-based systems are not merged, and systemd systems +have /bin/systemctl. The logic to find .../bin/systemctl is backwards. +If the logic looks for /usr/bin/systemctl it will not find it on +Debian-based systems; if it looks for /bin/systemctl it will find it +on Fedora and RHEL by virtue of the symlink. (RHEL6 and others will +find their respective init regardless.) +Fix: change the logic to look for /bin/systemctl instead. + +4) The logic for deciding to run systemctl (or not) is a bit silly. +Fix: simply invoke the found method via the function pointer in the +table. + +Label: DOWNSTREAM ONLY + +Change-Id: I33681b296a73aebb078bda6ac0d3a1d3b9770a21 +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://review.gluster.org/17013 +Smoke: Gluster Build System +Reviewed-by: Niels de Vos +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Reviewed-by: jiffin tony Thottan +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167141 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 21 +++++++++--------- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 32 +++++++++++----------------- + 2 files changed, 23 insertions(+), 30 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 6b011be..4b93f95 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -20,6 +20,7 @@ + # ensure that the NFS GRACE DBUS signal is sent after the VIP moves to + # the new host. + ++GANESHA_HA_SH=$(realpath $0) + HA_NUM_SERVERS=0 + HA_SERVERS="" + HA_VOL_NAME="gluster_shared_storage" +@@ -68,9 +69,9 @@ function find_rhel7_conf + done + } + +-if [ -z $CONFFILE ] ++if [ -z ${CONFFILE} ] + then +- find_rhel7_conf $OPTIONS ++ find_rhel7_conf ${OPTIONS} + + fi + +@@ -90,9 +91,9 @@ usage() { + + determine_service_manager () { + +- if [ -e "/usr/bin/systemctl" ]; ++ if [ -e "/bin/systemctl" ]; + then +- SERVICE_MAN="/usr/bin/systemctl" ++ SERVICE_MAN="/bin/systemctl" + elif [ -e "/sbin/invoke-rc.d" ]; + then + SERVICE_MAN="/sbin/invoke-rc.d" +@@ -100,7 +101,7 @@ determine_service_manager () { + then + SERVICE_MAN="/sbin/service" + fi +- if [ "$SERVICE_MAN" == "DISTRO_NOT_FOUND" ] ++ if [ "${SERVICE_MAN}" == "DISTRO_NOT_FOUND" ] + then + echo "Service manager not recognized, exiting" + exit 1 +@@ -113,21 +114,21 @@ manage_service () + local new_node=${2} + local option= + +- if [ "$action" == "start" ]; then ++ if [ "${action}" == "start" ]; then + option="yes" + else + option="no" + fi + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +-${SECRET_PEM} root@${new_node} "/usr/libexec/ganesha/ganesha-ha.sh --setup-ganesha-conf-files $HA_CONFDIR $option" ++${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option" + +- if [ "$SERVICE_MAN" == "/usr/bin/systemctl" ] ++ if [ "${SERVICE_MAN}" == "/bin/systemctl" ] + then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +-${SECRET_PEM} root@${new_node} "$SERVICE_MAN ${action} nfs-ganesha" ++${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha" + else + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +-${SECRET_PEM} root@${new_node} "$SERVICE_MAN nfs-ganesha ${action}" ++${SECRET_PEM} root@${new_node} "${SERVICE_MAN} nfs-ganesha ${action}" + fi + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index fac16e6..81f794d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -122,12 +122,9 @@ sc_service_action(struct service_command *sc, char *command) + static int + manage_service(char *action) + { +- struct stat stbuf = { +- 0, +- }; + int i = 0; + int ret = 0; +- struct service_command sc_list[] = {{.binary = "/usr/bin/systemctl", ++ struct service_command sc_list[] = {{.binary = "/bin/systemctl", + .service = "nfs-ganesha", + .action = sc_systemctl_action}, + {.binary = "/sbin/invoke-rc.d", +@@ -139,15 +136,10 @@ manage_service(char *action) + {.binary = NULL}}; + + while (sc_list[i].binary != NULL) { +- ret = sys_stat(sc_list[i].binary, &stbuf); ++ ret = sys_access(sc_list[i].binary, X_OK); + if (ret == 0) { + gf_msg_debug(THIS->name, 0, "%s found.", sc_list[i].binary); +- if (strcmp(sc_list[i].binary, "/usr/bin/systemctl") == 0) +- ret = sc_systemctl_action(&sc_list[i], action); +- else +- ret = sc_service_action(&sc_list[i], action); +- +- return ret; ++ return sc_list[i].action(&sc_list[i], action); + } + i++; + } +@@ -449,7 +441,7 @@ manage_export_config(char *volname, char *value, char **op_errstr) + + GF_ASSERT(volname); + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/create-export-ganesha.sh", ++ runner_add_args(&runner, GANESHA_PREFIX "/create-export-ganesha.sh", + CONFDIR, value, volname, NULL); + ret = runner_run(&runner); + +@@ -558,8 +550,8 @@ ganesha_manage_export(dict_t *dict, char *value, char **op_errstr) + } + + if (check_host_list()) { +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/dbus-send.sh", CONFDIR, +- value, volname, NULL); ++ runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, value, ++ volname, NULL); + ret = runner_run(&runner); + if (ret) { + gf_asprintf(op_errstr, +@@ -610,8 +602,8 @@ tear_down_cluster(gf_boolean_t run_teardown) + + if (run_teardown) { + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", +- "teardown", CONFDIR, NULL); ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "teardown", ++ CONFDIR, NULL); + ret = runner_run(&runner); + /* * + * Remove all the entries in CONFDIR expect ganesha.conf and +@@ -685,7 +677,7 @@ setup_cluster(gf_boolean_t run_setup) + + if (run_setup) { + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "setup", ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "setup", + CONFDIR, NULL); + ret = runner_run(&runner); + } +@@ -714,7 +706,7 @@ teardown(gf_boolean_t run_teardown, char **op_errstr) + } + + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "cleanup", ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "cleanup", + CONFDIR, NULL); + ret = runner_run(&runner); + if (ret) +@@ -759,7 +751,7 @@ stop_ganesha(char **op_errstr) + }; + + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", + "--setup-ganesha-conf-files", CONFDIR, "no", NULL); + ret = runner_run(&runner); + if (ret) { +@@ -828,7 +820,7 @@ start_ganesha(char **op_errstr) + + if (check_host_list()) { + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", + "--setup-ganesha-conf-files", CONFDIR, "yes", NULL); + ret = runner_run(&runner); + if (ret) { +-- +1.8.3.1 + diff --git a/SOURCES/0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch b/SOURCES/0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch new file mode 100644 index 0000000..996e2d0 --- /dev/null +++ b/SOURCES/0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch @@ -0,0 +1,40 @@ +From 16d298584c70138fd639281bc900838d7938aec9 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Wed, 22 Feb 2017 14:37:04 +0530 +Subject: [PATCH 057/124] ganesha/scripts : Remove export entries from + ganesha.conf during cleanup + +Label: DOWNSTREAM ONLY + +Change-Id: I288f7c9ced23d258a7ce1242d8efe03a4bf6f746 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://review.gluster.org/16708 +Smoke: Gluster Build System +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Reviewed-by: soumya k +Reviewed-by: Kaleb KEITHLEY +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167142 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 4b93f95..7ba80b5 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -342,6 +342,7 @@ cleanup_ganesha_config () + rm -f /etc/corosync/corosync.conf + rm -rf /etc/cluster/cluster.conf* + rm -rf /var/lib/pacemaker/cib/* ++ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf + } + + do_create_virt_ip_constraints() +-- +1.8.3.1 + diff --git a/SOURCES/0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch b/SOURCES/0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch new file mode 100644 index 0000000..251e78d --- /dev/null +++ b/SOURCES/0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch @@ -0,0 +1,62 @@ +From 172f32058b1a7d2e42f373490853aef5dd72f02f Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Wed, 22 Feb 2017 14:20:41 +0530 +Subject: [PATCH 058/124] glusterd/ganesha : During volume delete remove the + ganesha export configuration file + +Label: DOWNSTREAM ONLY + +Change-Id: I0363e7f4d7cefd3f1b3c4f91e495767ec52e230e +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://review.gluster.org/16707 +Smoke: Gluster Build System +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Reviewed-by: soumya k +Reviewed-by: Kaleb KEITHLEY +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167143 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 9 +++++++++ + 2 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 81f794d..6d72fda 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -445,7 +445,7 @@ manage_export_config(char *volname, char *value, char **op_errstr) + CONFDIR, value, volname, NULL); + ret = runner_run(&runner); + +- if (ret) ++ if (ret && !(*op_errstr)) + gf_asprintf(op_errstr, + "Failed to create" + " NFS-Ganesha export config file."); +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index a0417ca..81c668c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -2936,6 +2936,15 @@ glusterd_op_delete_volume(dict_t *dict) + goto out; + } + ++ if (glusterd_check_ganesha_export(volinfo)) { ++ ret = manage_export_config(volname, "off", NULL); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, 0, ++ "Could not delete ganesha export conf file " ++ "for %s", ++ volname); ++ } ++ + ret = glusterd_delete_volume(volinfo); + out: + gf_msg_debug(this->name, 0, "returning %d", ret); +-- +1.8.3.1 + diff --git a/SOURCES/0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch b/SOURCES/0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch new file mode 100644 index 0000000..e41a178 --- /dev/null +++ b/SOURCES/0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch @@ -0,0 +1,132 @@ +From 8b501d9dfbeecb3ffdc3cd11b7c74aa929356ed6 Mon Sep 17 00:00:00 2001 +From: jiffin tony thottan +Date: Mon, 7 Dec 2015 14:38:54 +0530 +Subject: [PATCH 059/124] glusterd/ganesha : throw proper error for "gluster + nfs-ganesha disable" + +For first time or if "gluster nfs-ganesha enable" fails the global option +"nfs-ganesha" won't be stored in glusterd's dictionary. In both cases the +"gluster nfs-ganesha disable" throws following error : +"nfs-ganesha: failed: nfs-ganesha is already (null)d." + +Also this patch provides the missing prompt for nfs-ganesha disable in 3.10 + +Label: DOWNSTREAM ONLY + +Change-Id: I7c9fd6dabedc0cfb14c5190b3554bc63a6bc0340 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://review.gluster.org/16791 +Smoke: Gluster Build System +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Reviewed-by: soumya k +Reviewed-by: Kaleb KEITHLEY +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167144 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-parser.c | 33 +++++++++++++++++----------- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 22 +++++-------------- + 2 files changed, 26 insertions(+), 29 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index cd9c445..f85958b 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -5908,20 +5908,27 @@ cli_cmd_ganesha_parse(struct cli_state *state, const char **words, + goto out; + } + +- question = +- "Enabling NFS-Ganesha requires Gluster-NFS to be" +- " disabled across the trusted pool. Do you " +- "still want to continue?\n"; +- + if (strcmp(value, "enable") == 0) { +- answer = cli_cmd_get_confirmation(state, question); +- if (GF_ANSWER_NO == answer) { +- gf_log("cli", GF_LOG_ERROR, +- "Global operation " +- "cancelled, exiting"); +- ret = -1; +- goto out; +- } ++ question = ++ "Enabling NFS-Ganesha requires Gluster-NFS to be " ++ "disabled across the trusted pool. Do you " ++ "still want to continue?\n"; ++ } else if (strcmp(value, "disable") == 0) { ++ question = ++ "Disabling NFS-Ganesha will tear down the entire " ++ "ganesha cluster across the trusted pool. Do you " ++ "still want to continue?\n"; ++ } else { ++ ret = -1; ++ goto out; ++ } ++ answer = cli_cmd_get_confirmation(state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_ERROR, ++ "Global operation " ++ "cancelled, exiting"); ++ ret = -1; ++ goto out; + } + cli_out("This will take a few minutes to complete. Please wait .."); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 6d72fda..1d17a33 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -252,8 +252,7 @@ int + glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr) + { + int ret = -1; +- int value = -1; +- gf_boolean_t option = _gf_false; ++ char *value = NULL; + char *str = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; +@@ -264,8 +263,8 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr) + priv = this->private; + GF_ASSERT(priv); + +- value = dict_get_str_boolean(dict, "value", _gf_false); +- if (value == -1) { ++ ret = dict_get_str(dict, "value", &value); ++ if (value == NULL) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "value not present."); + goto out; +@@ -273,22 +272,13 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr) + /* This dict_get will fail if the user had never set the key before */ + /*Ignoring the ret value and proceeding */ + ret = dict_get_str(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str); +- if (ret == -1) { +- gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_GET_FAILED, +- "Global dict not present."); +- ret = 0; +- goto out; +- } +- /* Validity of the value is already checked */ +- ret = gf_string2boolean(str, &option); +- /* Check if the feature is already enabled, fail in that case */ +- if (value == option) { +- gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", str); ++ if (str ? strcmp(value, str) == 0 : strcmp(value, "disable") == 0) { ++ gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", value); + ret = -1; + goto out; + } + +- if (value) { ++ if (strcmp(value, "enable")) { + ret = start_ganesha(op_errstr); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL, +-- +1.8.3.1 + diff --git a/SOURCES/0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch b/SOURCES/0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch new file mode 100644 index 0000000..39202ca --- /dev/null +++ b/SOURCES/0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch @@ -0,0 +1,61 @@ +From 93635333d17a03078a6bf72771445e1bd9ebdc15 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Thu, 2 Mar 2017 12:22:30 +0530 +Subject: [PATCH 060/124] ganesha/scripts : Stop ganesha process on all nodes + if cluster setup fails + +During staging phase of volume option "nfs-ganesha", symlink "ganesha.conf" +will be created plus ganesha process will be started. The cluster setup +happens during commit phase of that option. So if cluster set up fails, the +ganesha process will be running on all cluster nodes. + +Label: DOWNSTREAM ONLY + +Change-Id: Ib2cb85364b7ef5b702acb4826ffdf8e6f31a2acd +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://review.gluster.org/16823 +Smoke: Gluster Build System +Tested-by: Kaleb KEITHLEY +Reviewed-by: soumya k +Reviewed-by: Kaleb KEITHLEY +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167145 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 7ba80b5..db3f921 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -175,6 +175,13 @@ determine_servers() + fi + } + ++stop_ganesha_all() ++{ ++ local serverlist=${1} ++ for node in ${serverlist} ; do ++ manage_service "stop" ${node} ++ done ++} + + setup_cluster() + { +@@ -191,6 +198,8 @@ setup_cluster() + pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers} + if [ $? -ne 0 ]; then + logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" ++ #set up failed stop all ganesha process and clean up symlinks in cluster ++ stop_ganesha_all ${servers} + exit 1; + fi + pcs cluster start --all +-- +1.8.3.1 + diff --git a/SOURCES/0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch b/SOURCES/0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch new file mode 100644 index 0000000..610c471 --- /dev/null +++ b/SOURCES/0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch @@ -0,0 +1,106 @@ +From a766878e11a984680ed29f13aae713d464ec985e Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Wed, 19 Apr 2017 16:12:10 +0530 +Subject: [PATCH 061/124] ganesha : allow refresh-config and volume + export/unexport in failover state + +If ganesha is not running on one of nodes in HA cluster, then alli dbus +commands send to that ganesha server will fail. This results in both +refresh-config and volume export/unepxort failure. This change will +gracefully handle those scenarios. + +Label: DOWNSTREAM ONLY + +Change-Id: I3f1b7b7ca98e54c273c266e56357d8e24dd1b14b +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://review.gluster.org/17081 +Smoke: Gluster Build System +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Reviewed-by: soumya k +Reviewed-by: Kaleb KEITHLEY +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167146 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 6 ++---- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 24 +++++++++++++++--------- + xlators/mgmt/glusterd/src/glusterd-messages.h | 2 +- + 3 files changed, 18 insertions(+), 14 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index db3f921..f040ef6 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -275,8 +275,7 @@ string:\"EXPORT(Export_Id=$export_id)\" 2>&1") + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ]; then +- echo "Error: refresh-config failed on ${current_host}." +- exit 1 ++ echo "Refresh-config failed on ${current_host}" + else + echo "Refresh-config completed on ${current_host}." + fi +@@ -297,8 +296,7 @@ string:"EXPORT(Export_Id=$export_id)" 2>&1) + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ] ; then +- echo "Error: refresh-config failed on localhost." +- exit 1 ++ echo "Refresh-config failed on localhost." + else + echo "Success: refresh-config completed." + fi +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 1d17a33..ee8b588 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -540,15 +540,21 @@ ganesha_manage_export(dict_t *dict, char *value, char **op_errstr) + } + + if (check_host_list()) { +- runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, value, +- volname, NULL); +- ret = runner_run(&runner); +- if (ret) { +- gf_asprintf(op_errstr, +- "Dynamic export" +- " addition/deletion failed." +- " Please see log file for details"); +- goto out; ++ /* Check whether ganesha is running on this node */ ++ if (manage_service("status")) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GANESHA_NOT_RUNNING, ++ "Export failed, NFS-Ganesha is not running"); ++ } else { ++ runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, ++ value, volname, NULL); ++ ret = runner_run(&runner); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "Dynamic export" ++ " addition/deletion failed." ++ " Please see log file for details"); ++ goto out; ++ } + } + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h +index 9558480..c7b3ca8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-messages.h ++++ b/xlators/mgmt/glusterd/src/glusterd-messages.h +@@ -298,6 +298,6 @@ GLFS_MSGID( + GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE, + GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL, + GD_MSG_MANAGER_FUNCTION_FAILED, GD_MSG_NFS_GANESHA_DISABLED, +- GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL); ++ GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL); + + #endif /* !_GLUSTERD_MESSAGES_H_ */ +-- +1.8.3.1 + diff --git a/SOURCES/0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch b/SOURCES/0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch new file mode 100644 index 0000000..71b4416 --- /dev/null +++ b/SOURCES/0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch @@ -0,0 +1,59 @@ +From eb784a40a4f72e347945e0d66ac1a28389bb076c Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Fri, 28 Apr 2017 17:27:46 +0530 +Subject: [PATCH 062/124] glusterd/ganesha : perform removal of ganesha.conf on + nodes only in ganesha cluster + +Label: DOWNSTREAM ONLY + +Change-Id: I864ecd9391adf80fb1fa6ad2f9891a9ce77135e7 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://review.gluster.org/17138 +Smoke: Gluster Build System +Reviewed-by: soumya k +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Reviewed-by: Kaleb KEITHLEY +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167147 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index ee8b588..b743216 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -746,17 +746,16 @@ stop_ganesha(char **op_errstr) + 0, + }; + +- runinit(&runner); +- runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", +- "--setup-ganesha-conf-files", CONFDIR, "no", NULL); +- ret = runner_run(&runner); +- if (ret) { +- gf_asprintf(op_errstr, +- "removal of symlink ganesha.conf " +- "in /etc/ganesha failed"); +- } +- + if (check_host_list()) { ++ runinit(&runner); ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", ++ "--setup-ganesha-conf-files", CONFDIR, "no", NULL); ++ ret = runner_run(&runner); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "removal of symlink ganesha.conf " ++ "in /etc/ganesha failed"); ++ } + ret = manage_service("stop"); + if (ret) + gf_asprintf(op_errstr, +-- +1.8.3.1 + diff --git a/SOURCES/0063-glusterd-ganesha-update-cache-invalidation-properly-.patch b/SOURCES/0063-glusterd-ganesha-update-cache-invalidation-properly-.patch new file mode 100644 index 0000000..7bbd920 --- /dev/null +++ b/SOURCES/0063-glusterd-ganesha-update-cache-invalidation-properly-.patch @@ -0,0 +1,144 @@ +From e5450c639915f4c29ae2ad480e4128b5845254cc Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Tue, 25 Apr 2017 16:36:40 +0530 +Subject: [PATCH 063/124] glusterd/ganesha : update cache invalidation properly + during volume stop + +As per current code, during volume stop for ganesha enabled volume the +feature.cache-invalidation was turned "off" in ganesha_manage_export(). +And it never turn back to "on" when volume is started. It is not desire +to modify the volume options during stop, this patch fixes above mentioned +issue. + +Label: DOWNSTREAM ONLY + +Change-Id: Iea9c62e5cda4f54805b41ea6055cf0c3652a634c +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://review.gluster.org/17111 +Smoke: Gluster Build System +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Reviewed-by: Kaleb KEITHLEY +Reviewed-by: Raghavendra Talur +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167148 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 33 ++++++++++++++----------- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 +-- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 2 +- + xlators/mgmt/glusterd/src/glusterd.h | 3 ++- + 4 files changed, 23 insertions(+), 19 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index b743216..1c2ba7a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -445,7 +445,8 @@ manage_export_config(char *volname, char *value, char **op_errstr) + + /* Exports and unexports a particular volume via NFS-Ganesha */ + int +-ganesha_manage_export(dict_t *dict, char *value, char **op_errstr) ++ganesha_manage_export(dict_t *dict, char *value, ++ gf_boolean_t update_cache_invalidation, char **op_errstr) + { + runner_t runner = { + 0, +@@ -558,19 +559,21 @@ ganesha_manage_export(dict_t *dict, char *value, char **op_errstr) + } + } + +- vol_opts = volinfo->dict; +- ret = dict_set_dynstr_with_alloc(vol_opts, "features.cache-invalidation", +- value); +- if (ret) +- gf_asprintf(op_errstr, +- "Cache-invalidation could not" +- " be set to %s.", +- value); +- ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); +- if (ret) +- gf_asprintf(op_errstr, "failed to store volinfo for %s", +- volinfo->volname); +- ++ if (update_cache_invalidation) { ++ vol_opts = volinfo->dict; ++ ret = dict_set_dynstr_with_alloc(vol_opts, ++ "features.cache-invalidation", value); ++ if (ret) ++ gf_asprintf(op_errstr, ++ "Cache-invalidation could not" ++ " be set to %s.", ++ value); ++ ret = glusterd_store_volinfo(volinfo, ++ GLUSTERD_VOLINFO_VER_AC_INCREMENT); ++ if (ret) ++ gf_asprintf(op_errstr, "failed to store volinfo for %s", ++ volinfo->volname); ++ } + out: + return ret; + } +@@ -867,7 +870,7 @@ glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key, + GF_ASSERT(value); + + if (strcmp(key, "ganesha.enable") == 0) { +- ret = ganesha_manage_export(dict, value, op_errstr); ++ ret = ganesha_manage_export(dict, value, _gf_true, op_errstr); + if (ret < 0) + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index a630c48..52809a8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -1178,7 +1178,7 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr) + + if ((strcmp(key, "ganesha.enable") == 0) && + (strcmp(value, "off") == 0)) { +- ret = ganesha_manage_export(dict, "off", op_errstr); ++ ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr); + if (ret) + goto out; + } +@@ -1691,7 +1691,7 @@ glusterd_op_stage_reset_volume(dict_t *dict, char **op_errstr) + */ + if (volinfo && (!strcmp(key, "all") || !strcmp(key, "ganesha.enable"))) { + if (glusterd_check_ganesha_export(volinfo)) { +- ret = ganesha_manage_export(dict, "off", op_errstr); ++ ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL, + "Could not reset ganesha.enable key"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 81c668c..de4eccb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1825,7 +1825,7 @@ glusterd_op_stage_stop_volume(dict_t *dict, char **op_errstr) + + ret = glusterd_check_ganesha_export(volinfo); + if (ret) { +- ret = ganesha_manage_export(dict, "off", op_errstr); ++ ret = ganesha_manage_export(dict, "off", _gf_false, op_errstr); + if (ret) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, + GD_MSG_NFS_GNS_UNEXPRT_VOL_FAIL, +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 5135181..e858ce4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -1368,7 +1368,8 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr); + int + glusterd_op_set_ganesha(dict_t *dict, char **errstr); + int +-ganesha_manage_export(dict_t *dict, char *value, char **op_errstr); ++ganesha_manage_export(dict_t *dict, char *value, ++ gf_boolean_t update_cache_invalidation, char **op_errstr); + gf_boolean_t + glusterd_is_ganesha_cluster(); + gf_boolean_t +-- +1.8.3.1 + diff --git a/SOURCES/0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch b/SOURCES/0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch new file mode 100644 index 0000000..042e1c0 --- /dev/null +++ b/SOURCES/0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch @@ -0,0 +1,52 @@ +From 37bf4daca164cfcb260760ee2fd25d66f920dc7f Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Wed, 22 Feb 2017 18:26:30 +0530 +Subject: [PATCH 064/124] glusterd/ganesha : return proper value in pre_setup() + +Label: DOWNSTREAM ONLY + +Change-Id: I6f7ce82488904c7d418ee078162f26f1ec81e9d9 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://review.gluster.org/16733 +Smoke: Gluster Build System +Reviewed-by: Atin Mukherjee +Reviewed-by: Raghavendra Talur +Tested-by: Raghavendra Talur +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167149 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 1c2ba7a..d9fdfc6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -843,16 +843,15 @@ pre_setup(gf_boolean_t run_setup, char **op_errstr) + { + int ret = 0; + +- ret = check_host_list(); +- +- if (ret) { ++ if (check_host_list()) { + ret = setup_cluster(run_setup); + if (ret == -1) + gf_asprintf(op_errstr, + "Failed to set up HA " + "config for NFS-Ganesha. " + "Please check the log file for details"); +- } ++ } else ++ ret = -1; + + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0065-ganesha-scripts-remove-dependency-over-export-config.patch b/SOURCES/0065-ganesha-scripts-remove-dependency-over-export-config.patch new file mode 100644 index 0000000..4c99ef4 --- /dev/null +++ b/SOURCES/0065-ganesha-scripts-remove-dependency-over-export-config.patch @@ -0,0 +1,58 @@ +From 7a47c004b907ed5469b78d559cae6d151e4d626b Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Thu, 23 Feb 2017 16:21:52 +0530 +Subject: [PATCH 065/124] ganesha/scripts : remove dependency over export + configuration file for unexport + +Currently unexport is performed by reading export id from volume configuration +file. So unexport has dependency over that file. This patch will unexport with +help of dbus command ShowExport. And it will only unexport the share which is +added via cli. + +Label: DOWNSTREAM ONLY + +Change-Id: I6f3c9b2bb48f0328b18e9cc0e4b9356174afd596 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://review.gluster.org/16771 +Smoke: Gluster Build System +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Reviewed-by: Kaleb KEITHLEY +Reviewed-by: Raghavendra Talur +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167150 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/dbus-send.sh | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh +index ec8d948..9d613a0 100755 +--- a/extras/ganesha/scripts/dbus-send.sh ++++ b/extras/ganesha/scripts/dbus-send.sh +@@ -41,8 +41,18 @@ string:"EXPORT(Path=/$VOL)" + #This function removes an export dynamically(uses the export_id of the export) + function dynamic_export_remove() + { +- removed_id=`cat $GANESHA_DIR/exports/export.$VOL.conf |\ +-grep Export_Id | awk -F"[=,;]" '{print$2}'| tr -d '[[:space:]]'` ++ # Below bash fetch all the export from ShowExport command and search ++ # export entry based on path and then get its export entry. ++ # There are two possiblities for path, either entire volume will be ++ # exported or subdir. It handles both cases. But it remove only first ++ # entry from the list based on assumption that entry exported via cli ++ # has lowest export id value ++ removed_id=$(dbus-send --type=method_call --print-reply --system \ ++ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ ++ org.ganesha.nfsd.exportmgr.ShowExports | grep -B 1 -we \ ++ "/"$VOL -e "/"$VOL"/" | grep uint16 | awk '{print $2}' \ ++ | head -1) ++ + dbus-send --print-reply --system \ + --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ + org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id +-- +1.8.3.1 + diff --git a/SOURCES/0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch b/SOURCES/0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch new file mode 100644 index 0000000..187b97c --- /dev/null +++ b/SOURCES/0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch @@ -0,0 +1,41 @@ +From d91eadbbb3e2d02e7297214da394b0e232544386 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Tue, 2 May 2017 14:06:00 +0530 +Subject: [PATCH 066/124] glusterd/ganesha : add proper NULL check in + manage_export_config + +Label: DOWNSTREAM ONLY + +Change-Id: I872b2b6b027f04e61f60ad85588f50e1ef2f988c +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://review.gluster.org/17150 +Smoke: Gluster Build System +Reviewed-by: soumya k +NetBSD-regression: NetBSD Build System +Reviewed-by: Kaleb KEITHLEY +CentOS-regression: Gluster Build System +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167151 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index d9fdfc6..fe0bffc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -435,7 +435,7 @@ manage_export_config(char *volname, char *value, char **op_errstr) + CONFDIR, value, volname, NULL); + ret = runner_run(&runner); + +- if (ret && !(*op_errstr)) ++ if (ret && op_errstr) + gf_asprintf(op_errstr, + "Failed to create" + " NFS-Ganesha export config file."); +-- +1.8.3.1 + diff --git a/SOURCES/0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch b/SOURCES/0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch new file mode 100644 index 0000000..233725e --- /dev/null +++ b/SOURCES/0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch @@ -0,0 +1,41 @@ +From 1e5c6bb28894a57e5ca5ed7b4b3b5e05efecf7cd Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Wed, 3 May 2017 12:47:14 +0530 +Subject: [PATCH 067/124] ganesha : minor improvments for commit e91cdf4 + (17081) + +Label: DOWNSTREAM ONLY + +Change-Id: I3af13e081c5e46cc6f2c132e7a5106ac3355c850 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://review.gluster.org/17152 +Smoke: Gluster Build System +Reviewed-by: soumya k +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Reviewed-by: Kaleb KEITHLEY +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167152 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index f040ef6..cedc3fa 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -275,7 +275,7 @@ string:\"EXPORT(Export_Id=$export_id)\" 2>&1") + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ]; then +- echo "Refresh-config failed on ${current_host}" ++ echo "Refresh-config failed on ${current_host}. Please check logs on ${current_host}" + else + echo "Refresh-config completed on ${current_host}." + fi +-- +1.8.3.1 + diff --git a/SOURCES/0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch b/SOURCES/0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch new file mode 100644 index 0000000..3658ec1 --- /dev/null +++ b/SOURCES/0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch @@ -0,0 +1,58 @@ +From aabc623f99d22a2a9e1d52f3ca7de1dc5b49946d Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Tue, 13 Jun 2017 07:36:50 -0400 +Subject: [PATCH 068/124] common-ha: surviving ganesha.nfsd not put in grace on + fail-over + +Behavior change is seen in new HA in RHEL 7.4 Beta. Up to now clone +RAs have been created with "pcs resource create ... meta notify=true". +Their notify method is invoked with pre-start or post-stop when one of +the clone RAs is started or stopped. + +In 7.4 Beta the notify method we observe that the notify method is not +invoked when one of the clones is stopped (or started). + +Ken Gaillot, one of the pacemaker devs, wrote: + With the above command, pcs puts the notify=true meta-attribute + on the primitive instead of the clone. Looking at the pcs help, + that seems expected (--clone notify=true would put it on the clone, + meta notify=true puts it on the primitive). If you drop the "meta" + above, I think it will work again. + +And indeed his suggested fix does work on both RHEL 7.4 Beta and RHEL +7.3 and presumably Fedora. + +Label: DOWNSTREAM ONLY + +Change-Id: Idbb539f1366df6d39f77431c357dff4e53a2df6d +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://review.gluster.org/17534 +Smoke: Gluster Build System +Reviewed-by: soumya k +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167153 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index cedc3fa..537c965 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -445,7 +445,7 @@ setup_create_resources() + # ganesha-active crm_attribute + sleep 5 + +- pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone meta notify=true ++ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone notify=true + if [ $? -ne 0 ]; then + logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed" + fi +-- +1.8.3.1 + diff --git a/SOURCES/0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch b/SOURCES/0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch new file mode 100644 index 0000000..16aea73 --- /dev/null +++ b/SOURCES/0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch @@ -0,0 +1,96 @@ +From 916a79ea78db264ceedd4ebdba794e488b82eceb Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Wed, 21 Jun 2017 10:01:20 -0400 +Subject: [PATCH 069/124] common-ha: enable and disable selinux + ganesha_use_fusefs + +Starting in Fedora 26 and RHEL 7.4 there are new targeted policies +in selinux which include a tuneable to allow ganesha.nfsd to access +the gluster (FUSE) shared_storage volume where ganesha maintains its +state. + +N.B. rpm doesn't have a way to distinguish between RHEL 7.3 or 7.4 +so it can't be enabled for RHEL at this time. /usr/sbin/semanage is +in policycoreutils-python in RHEL (versus policycoreutils-python-utils +in Fedora.) Once RHEL 7.4 GAs we may also wish to specify the version +for RHEL 7 explicitly, i.e. + Requires: selinux-policy >= 3.13.1-160. +But beware, the corresponding version in Fedora 26 seems to be +selinux-policy-3.13.1.258 or so. (Maybe earlier versions, but that's +what's currently in the F26 beta. + +release-3.10 is the upstream master branch for glusterfs-ganesha. For +release-3.11 and later storhaug needs a similar change, which is +tracked by https://github.com/linux-ha-storage/storhaug/issues/11 + +Maybe at some point we would want to consider migrating the targeted +policies for glusterfs (and nfs-ganesha) from selinux-policy to a +glusterfs-selinux (and nfs-ganesha-selinux) subpackage? + +Label: DOWNSTREAM ONLY + +Change-Id: I04a5443edd00636cbded59a2baddfa98095bf7ac +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://review.gluster.org/17597 +Smoke: Gluster Build System +Reviewed-by: Niels de Vos +Reviewed-by: jiffin tony Thottan +CentOS-regression: Gluster Build System +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167154 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index d748ebc..b01c94f 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -466,6 +466,11 @@ Requires: pcs, dbus + Requires: cman, pacemaker, corosync + %endif + ++%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++Requires(post): policycoreutils-python-utils ++Requires(postun): policycoreutils-python-utils ++%endif ++ + %description ganesha + GlusterFS is a distributed file-system capable of scaling to several + petabytes. It aggregates various storage bricks over Infiniband RDMA +@@ -923,6 +928,14 @@ exit 0 + %systemd_post glustereventsd + %endif + ++%if ( 0%{!?_without_server:1} ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%post ganesha ++semanage boolean -m ganesha_use_fusefs --on ++exit 0 ++%endif ++%endif ++ + %if ( 0%{!?_without_georeplication:1} ) + %post geo-replication + if [ $1 -ge 1 ]; then +@@ -1055,6 +1068,14 @@ fi + exit 0 + %endif + ++%if ( 0%{!?_without_server:1} ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%postun ganesha ++semanage boolean -m ganesha_use_fusefs --off ++exit 0 ++%endif ++%endif ++ + ##----------------------------------------------------------------------------- + ## All %%files should be placed here and keep them grouped + ## +-- +1.8.3.1 + diff --git a/SOURCES/0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch b/SOURCES/0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch new file mode 100644 index 0000000..6715f1f --- /dev/null +++ b/SOURCES/0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch @@ -0,0 +1,76 @@ +From f410cd9f9b9455373a9612423558d8d0f83cd0fc Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Wed, 12 Jul 2017 07:43:51 -0400 +Subject: [PATCH 070/124] packaging: glusterfs-ganesha update sometimes fails + semanage + +Depending on how dnf orders updates, the updated version of +selinux-policy-targeted with ganesha_use_fusefs may not be updated +before the glusterfs-ganesha update execute its %post scriptlet +containing the `semanage ganesha_use_fusefs ...` command. In such +situations the semanage command (silently) fails. + +Use a %trigger (and %triggerun) to run the scriptlet (again) after +selinux-policy-targeted with ganesha_use_fusefs has been installed +or updated. + +Note: the %triggerun is probably unnecessary, but it doesn't hurt. + +The release-3.10 branch is the "upstream master" for the glusterfs- +ganesha subpackage. + +Note: to be merged after https://review.gluster.org/17806 + +Label: DOWNSTREAM ONLY + +Change-Id: I1ad06d79fa1711e4abf038baf9f0a5b7bb665934 +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://review.gluster.org/17756 +Smoke: Gluster Build System +CentOS-regression: Gluster Build System +Reviewed-by: Niels de Vos +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167155 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index b01c94f..1d99a3d 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1077,6 +1077,28 @@ exit 0 + %endif + + ##----------------------------------------------------------------------------- ++## All %%trigger should be placed here and keep them sorted ++## ++%if ( 0%{!?_without_server:1} ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%trigger ganesha -- selinux-policy-targeted ++semanage boolean -m ganesha_use_fusefs --on ++exit 0 ++%endif ++%endif ++ ++##----------------------------------------------------------------------------- ++## All %%triggerun should be placed here and keep them sorted ++## ++%if ( 0%{!?_without_server:1} ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%triggerun ganesha -- selinux-policy-targeted ++semanage boolean -m ganesha_use_fusefs --off ++exit 0 ++%endif ++%endif ++ ++##----------------------------------------------------------------------------- + ## All %%files should be placed here and keep them grouped + ## + %files +-- +1.8.3.1 + diff --git a/SOURCES/0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch b/SOURCES/0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch new file mode 100644 index 0000000..ad14a89 --- /dev/null +++ b/SOURCES/0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch @@ -0,0 +1,66 @@ +From 662c94f3b3173bf78465644e2e42e03efd9ea493 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Mon, 17 Jul 2017 11:07:40 -0400 +Subject: [PATCH 071/124] common-ha: enable and disable selinux + gluster_use_execmem + +Starting in Fedora 26 and RHEL 7.4 there are new targeted policies in +selinux which include a tuneable to allow glusterd->ganesha-ha.sh->pcs +to access the pcs config, i.e. gluster-use-execmem. + +Note. rpm doesn't have a way to distinguish between RHEL 7.3 or 7.4 +or between 3.13.1-X and 3.13.1-Y so it can't be enabled for RHEL at +this time. + +/usr/sbin/semanage is in policycoreutils-python in RHEL (versus +policycoreutils-python-utils in Fedora.) + +Requires selinux-policy >= 3.13.1-160 in RHEL7. The corresponding +version in Fedora 26 seems to be selinux-policy-3.13.1-259 or so. (Maybe +earlier versions, but that's what was in F26 when I checked.) + +Label: DOWNSTREAM ONLY + +Change-Id: Ic474b3f7739ff5be1e99d94d00b55caae4ceb5a0 +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://review.gluster.org/17806 +Smoke: Gluster Build System +CentOS-regression: Gluster Build System +Reviewed-by: soumya k +Reviewed-by: Atin Mukherjee +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167156 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 537c965..f4400af 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -984,6 +984,9 @@ main() + usage + exit 0 + fi ++ ++ semanage boolean -m gluster_use_execmem --on ++ + HA_CONFDIR=${1%/}; shift + local ha_conf=${HA_CONFDIR}/ganesha-ha.conf + local node="" +@@ -1129,6 +1132,9 @@ $HA_CONFDIR/ganesha-ha.conf + ;; + + esac ++ ++ semanage boolean -m gluster_use_execmem --off ++ + } + + main $* +-- +1.8.3.1 + diff --git a/SOURCES/0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch b/SOURCES/0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch new file mode 100644 index 0000000..4bf730b --- /dev/null +++ b/SOURCES/0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch @@ -0,0 +1,60 @@ +From c147bbec10fc72b85301ab6a7580f15713b8a974 Mon Sep 17 00:00:00 2001 +From: Ambarish +Date: Tue, 12 Sep 2017 18:34:29 +0530 +Subject: [PATCH 072/124] ganesha-ha: don't set SELinux booleans if SELinux is + disabled + +semanage commands inside ganesha-ha.sh script will fail if selinux is +Disabled. This patch introduces a check if selinux is enabled or not, +and subsequently run semange commands only on selinux enabled systems. + +Label: DOWNSTREAM ONLY + +Change-Id: Ibee61cbb1d51a73e6c326b49bac5c7ce06feb310 +Signed-off-by: Ambarish +Reviewed-on: https://review.gluster.org/18264 +Reviewed-by: Niels de Vos +Smoke: Gluster Build System +Reviewed-by: Kaleb KEITHLEY +Reviewed-by: jiffin tony Thottan +Reviewed-by: Daniel Gryniewicz +CentOS-regression: Gluster Build System +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167157 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index f4400af..e1d3ea0 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -985,7 +985,9 @@ main() + exit 0 + fi + +- semanage boolean -m gluster_use_execmem --on ++ if (selinuxenabled) ;then ++ semanage boolean -m gluster_use_execmem --on ++ fi + + HA_CONFDIR=${1%/}; shift + local ha_conf=${HA_CONFDIR}/ganesha-ha.conf +@@ -1133,8 +1135,9 @@ $HA_CONFDIR/ganesha-ha.conf + + esac + +- semanage boolean -m gluster_use_execmem --off +- ++ if (selinuxenabled) ;then ++ semanage boolean -m gluster_use_execmem --off ++ fi + } + + main $* +-- +1.8.3.1 + diff --git a/SOURCES/0073-build-remove-ganesha-dependency-on-selinux-policy.patch b/SOURCES/0073-build-remove-ganesha-dependency-on-selinux-policy.patch new file mode 100644 index 0000000..4d16042 --- /dev/null +++ b/SOURCES/0073-build-remove-ganesha-dependency-on-selinux-policy.patch @@ -0,0 +1,45 @@ +From 52279c877264f41b522f747a986b937e6f054e2a Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Fri, 23 Jun 2017 20:43:16 +0530 +Subject: [PATCH 073/124] build: remove ganesha dependency on selinux-policy + +Problem: +Puddle creation fails with unresolved dependencies + unresolved deps: +*** selinux-policy >= 0:3.13.1-160 + +Solution: +We know a priori about the version at RHEL 7.4 is already the desired +version. So removing this explicit dependency *should* not be a gluster +test blocker. + +Label: DOWNSTREAM ONLY + +Change-Id: Id53ac0e41adc14704932787ee0dd3143e6615aaf +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/109945 +Reviewed-by: Kaleb Keithley +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167158 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 1d99a3d..e55e255 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -467,6 +467,7 @@ Requires: cman, pacemaker, corosync + %endif + + %if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++Requires: selinux-policy >= 3.13.1-160 + Requires(post): policycoreutils-python-utils + Requires(postun): policycoreutils-python-utils + %endif +-- +1.8.3.1 + diff --git a/SOURCES/0074-common-ha-enable-pacemaker-at-end-of-setup.patch b/SOURCES/0074-common-ha-enable-pacemaker-at-end-of-setup.patch new file mode 100644 index 0000000..6366f0c --- /dev/null +++ b/SOURCES/0074-common-ha-enable-pacemaker-at-end-of-setup.patch @@ -0,0 +1,67 @@ +From bfbda24746bf11573b485baf534a5cf1373c6c89 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Wed, 7 Jun 2017 08:15:48 -0400 +Subject: [PATCH 074/124] common-ha: enable pacemaker at end of setup + +Label: DOWNSTREAM ONLY + +Change-Id: I3ccd59b67ed364bfc5d27e88321ab5b9f8d471fd +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://code.engineering.redhat.com/gerrit/108431 +Reviewed-by: Soumya Koduri +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167159 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index e1d3ea0..d7dfb87 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -787,6 +787,22 @@ setup_state_volume() + } + + ++enable_pacemaker() ++{ ++ while [[ ${1} ]]; do ++ if [ "${SERVICE_MAN}" == "/usr/bin/systemctl" ]; then ++${SECRET_PEM} root@${1} ${SERVICE_MAN} enable pacemaker" ++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ++${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker" ++ else ++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ++${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable" ++ fi ++ shift ++ done ++} ++ ++ + addnode_state_volume() + { + local newnode=${1}; shift +@@ -1011,6 +1027,8 @@ main() + + if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then + ++ determine_service_manager ++ + setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}" + + setup_create_resources ${HA_SERVERS} +@@ -1019,6 +1037,8 @@ main() + + setup_state_volume ${HA_SERVERS} + ++ enable_pacemaker ${HA_SERVERS} ++ + else + + logger "insufficient servers for HA, aborting" +-- +1.8.3.1 + diff --git a/SOURCES/0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch b/SOURCES/0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch new file mode 100644 index 0000000..b524cfe --- /dev/null +++ b/SOURCES/0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch @@ -0,0 +1,43 @@ +From 0a124b59c662c8f85fe6d184b839cbfe29d5e8ab Mon Sep 17 00:00:00 2001 +From: Soumya Koduri +Date: Wed, 14 Jun 2017 15:20:22 +0530 +Subject: [PATCH 075/124] common-ha: Fix an incorrect syntax during setup + +There was an invalid line introduced as part of +https://code.engineering.redhat.com/gerrit/#/c/108431/ + +Detected by rpmdiff - + https://errata.devel.redhat.com/rpmdiff/show/175336?result_id=4796901 + +This change is to fix the same. + +Label: DOWNSTREAM ONLY + +Change-Id: I55cdd7d866cb175fb620dbbd2d02c36eab291a74 +Signed-off-by: Soumya Koduri +Reviewed-on: https://code.engineering.redhat.com/gerrit/109017 +Reviewed-by: Kaleb Keithley +Tested-by: Kaleb Keithley +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167160 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index d7dfb87..8302c5e 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -791,7 +791,6 @@ enable_pacemaker() + { + while [[ ${1} ]]; do + if [ "${SERVICE_MAN}" == "/usr/bin/systemctl" ]; then +-${SECRET_PEM} root@${1} ${SERVICE_MAN} enable pacemaker" + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ + ${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker" + else +-- +1.8.3.1 + diff --git a/SOURCES/0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch b/SOURCES/0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch new file mode 100644 index 0000000..4147a46 --- /dev/null +++ b/SOURCES/0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch @@ -0,0 +1,44 @@ +From a917a989232d2c72752f8a2cf27bad90b5acb83d Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Tue, 27 Feb 2018 15:35:30 +0530 +Subject: [PATCH 076/124] glusterd/ganesha : change voltype for ganesha.enable + in volume option table + +The voltype defined for ganesha.enable is features/ganesha. But ganesha xlator +was removed from client stack long back. Now it is defined as part of glusterd. +So reflecting the same on the volume option table. + +Label: DOWNSTREAM ONLY + +Upstream reference : +>patch link https://review.gluster.org/19639 +>Change-Id: Ifedd7493020b77bd54edfdbdd9c799d93b24d0aa +>BUG: 1486542 +>Signed-off-by: Jiffin Tony Thottan + +Change-Id: Ifedd7493020b77bd54edfdbdd9c799d93b24d0aa +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167161 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 13f423a..c8f6e67 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2599,7 +2599,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + }, + { + .key = "ganesha.enable", +- .voltype = "features/ganesha", ++ .voltype = "mgmt/ganesha", + .value = "off", + .option = "ganesha.enable", + .op_version = GD_OP_VERSION_3_7_0, +-- +1.8.3.1 + diff --git a/SOURCES/0077-glusterd-ganesha-create-remove-export-file-only-from.patch b/SOURCES/0077-glusterd-ganesha-create-remove-export-file-only-from.patch new file mode 100644 index 0000000..139a8dd --- /dev/null +++ b/SOURCES/0077-glusterd-ganesha-create-remove-export-file-only-from.patch @@ -0,0 +1,73 @@ +From 1e619b95e3f03e226fef135bfaeeca9b069eb978 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Wed, 14 Mar 2018 12:01:30 +0530 +Subject: [PATCH 077/124] glusterd/ganesha : create/remove export file only + from the node which performs ganesha.enable + +As part of volume set ganesha.enable on the ganesha export configuration file will be created/removed +using "create-export-ganesha.sh". This performed from the nodes which are part of ganesha cluster. +But it is not need since the file is saved in shared storage and consumed by the nodes in the ganesha cluster. + +Label: DOWNSTREAM ONLY + +Upstream Reference : +>patch link : https://review.gluster.org/#/c/19716/ +>Change-Id: I2583899972b47d451a90711940293004a3af4690 +>BUG: 1555195 +>Signed-off-by: Jiffin Tony Thottan + +Change-Id: I2583899972b47d451a90711940293004a3af4690 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167162 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 3 ++- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 2 +- + 3 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index fe0bffc..ff36476 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -530,7 +530,7 @@ ganesha_manage_export(dict_t *dict, char *value, + * Create the export file from the node where ganesha.enable "on" + * is executed + * */ +- if (option) { ++ if (option && is_origin_glusterd(dict)) { + ret = manage_export_config(volname, "on", op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL, +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 52809a8..10e2d48 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2415,7 +2415,8 @@ glusterd_op_reset_volume(dict_t *dict, char **op_rspstr) + } + + if (!strcmp(key, "ganesha.enable") || !strcmp(key, "all")) { +- if (glusterd_check_ganesha_export(volinfo)) { ++ if (glusterd_check_ganesha_export(volinfo) && ++ is_origin_glusterd(dict)) { + ret = manage_export_config(volname, "off", op_rspstr); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL, +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index de4eccb..1ea8ba6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -2936,7 +2936,7 @@ glusterd_op_delete_volume(dict_t *dict) + goto out; + } + +- if (glusterd_check_ganesha_export(volinfo)) { ++ if (glusterd_check_ganesha_export(volinfo) && is_origin_glusterd(dict)) { + ret = manage_export_config(volname, "off", NULL); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, 0, +-- +1.8.3.1 + diff --git a/SOURCES/0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch b/SOURCES/0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch new file mode 100644 index 0000000..fe29fc7 --- /dev/null +++ b/SOURCES/0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch @@ -0,0 +1,40 @@ +From 5daff948884b1b68ffcbc6ceea3c7affdb9700f4 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Wed, 4 Apr 2018 09:29:43 +0530 +Subject: [PATCH 078/124] common-ha/scripts : pass the list of servers properly + to stop_ganesha_all() + +Label: DOWNSTREAM ONLY + +Upstream Reference : +>Change-Id: I6d92623cd9fb450d7a27f5acc61eca0b3cbc9b08 +>BUG: 1563500 +>Signed-off-by: Jiffin Tony Thottan +>Patch link : https://review.gluster.org/#/c/19816/ + +Change-Id: I6d92623cd9fb450d7a27f5acc61eca0b3cbc9b08 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167163 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 8302c5e..4e5799f 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -199,7 +199,7 @@ setup_cluster() + if [ $? -ne 0 ]; then + logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" + #set up failed stop all ganesha process and clean up symlinks in cluster +- stop_ganesha_all ${servers} ++ stop_ganesha_all "${servers}" + exit 1; + fi + pcs cluster start --all +-- +1.8.3.1 + diff --git a/SOURCES/0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch b/SOURCES/0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch new file mode 100644 index 0000000..982a531 --- /dev/null +++ b/SOURCES/0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch @@ -0,0 +1,93 @@ +From 7e71723a46237f13a570961054b361dc1b34ab25 Mon Sep 17 00:00:00 2001 +From: Soumya Koduri +Date: Thu, 19 Jan 2017 15:01:12 +0530 +Subject: [PATCH 079/124] common-ha: All statd related files need to be owned + by rpcuser + +Statd service is started as rpcuser by default. Hence the +files/directories needed by it under '/var/lib/nfs' should be +owned by the same user. + +Note: This change is not in mainline as the cluster-bits +are being moved to storehaug project - +http://review.gluster.org/#/c/16349/ +http://review.gluster.org/#/c/16333/ + +Label: DOWNSTREAM ONLY + +Upstream Reference : +> Change-Id: I89fd06aa9700c5ce60026ac825da7c154d9f48fd +> BUG: 1414665 +> Signed-off-by: Soumya Koduri +> Reviewed-on: http://review.gluster.org/16433 +> Reviewed-by: jiffin tony Thottan +> Smoke: Gluster Build System +> Tested-by: Kaleb KEITHLEY +> NetBSD-regression: NetBSD Build System +> CentOS-regression: Gluster Build System + +Change-Id: I89fd06aa9700c5ce60026ac825da7c154d9f48fd +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167164 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 4e5799f..4a98f32 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -756,9 +756,11 @@ setup_state_volume() + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/state ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov +@@ -768,9 +770,11 @@ setup_state_volume() + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state +@@ -830,9 +834,11 @@ addnode_state_volume() + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/state ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov +@@ -842,9 +848,11 @@ addnode_state_volume() + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state +-- +1.8.3.1 + diff --git a/SOURCES/0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch b/SOURCES/0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch new file mode 100644 index 0000000..acd1d4a --- /dev/null +++ b/SOURCES/0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch @@ -0,0 +1,62 @@ +From c5c6720c5186741a3b01a5ba2b34633fc1a00fc5 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Mon, 30 Apr 2018 12:35:01 +0530 +Subject: [PATCH 080/124] glusterd/ganesha : Skip non-ganesha nodes properly + for ganesha HA set up + +Label: DOWNSTREAM ONLY + +Upstream reference: +>Patch unlink https://review.gluster.org/#/c/19949/ +>Change-Id: Iff7bc3ead43e97847219c5a5cc8b967bf0967903 +>BUG: 1573078 +>Signed-off-by: Jiffin Tony Thottan + +Change-Id: Iff7bc3ead43e97847219c5a5cc8b967bf0967903 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167165 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index ff36476..d882105 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -842,17 +842,20 @@ static int + pre_setup(gf_boolean_t run_setup, char **op_errstr) + { + int ret = 0; +- +- if (check_host_list()) { +- ret = setup_cluster(run_setup); +- if (ret == -1) ++ if (run_setup) { ++ if (!check_host_list()) { + gf_asprintf(op_errstr, +- "Failed to set up HA " +- "config for NFS-Ganesha. " +- "Please check the log file for details"); +- } else +- ret = -1; +- ++ "Running nfs-ganesha setup command " ++ "from node which is not part of ganesha cluster"); ++ return -1; ++ } ++ } ++ ret = setup_cluster(run_setup); ++ if (ret == -1) ++ gf_asprintf(op_errstr, ++ "Failed to set up HA " ++ "config for NFS-Ganesha. " ++ "Please check the log file for details"); + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch b/SOURCES/0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch new file mode 100644 index 0000000..0a4110f --- /dev/null +++ b/SOURCES/0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch @@ -0,0 +1,50 @@ +From 3cb9ed7e20f59eec036908eed4cfdbc61e990ee2 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Tue, 11 Dec 2018 10:09:42 -0500 +Subject: [PATCH 081/124] ganesha-ha: ensure pacemaker is enabled after setup + +There appears to be a race between `pcs cluster setup ...` early +in the setup and the `systemctl enable pacemaker` at the end. The +`pcs cluster setup ...` disables pacemaker and corosync. (Now, in +pacemaker-1.1.18. Was it always the case?) + +I am not able to reproduce this on my devel system. I speculate that +on a busy system that the `pcs cluster setup ...` disable may, under +the right conditions, not run until after the setup script enables +it. It must require the right alignment of the Sun, Moon, and all +the planets. + +Regardless, we'll use the --enable option to `pcs cluster setup ...` +to ensure that the cluster (re)starts pacemaker. + +Label: DOWNSTREAM ONLY + +Change-Id: I771ff62c37426438b80e61651a8b4ecaf2d549c3 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167166 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 4a98f32..32af1ca 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -195,9 +195,9 @@ setup_cluster() + + pcs cluster auth ${servers} + # pcs cluster setup --name ${name} ${servers} +- pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers} ++ pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} + if [ $? -ne 0 ]; then +- logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" ++ logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} failed" + #set up failed stop all ganesha process and clean up symlinks in cluster + stop_ganesha_all "${servers}" + exit 1; +-- +1.8.3.1 + diff --git a/SOURCES/0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch b/SOURCES/0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch new file mode 100644 index 0000000..6df51eb --- /dev/null +++ b/SOURCES/0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch @@ -0,0 +1,59 @@ +From 6d6841a996a52488e8a18606f386bba0a12b4231 Mon Sep 17 00:00:00 2001 +From: Soumya Koduri +Date: Fri, 18 Nov 2016 12:47:06 +0530 +Subject: [PATCH 082/124] build: Add dependency on netstat for + glusterfs-ganesha pkg + +portblock resource-agent needs netstat command but this dependency +should have been ideally added to resource-agents package. But the +fixes (bug1395594, bug1395596) are going to be available only +in the future RHEL 6.9 and RHEL 7.4 releases. Hence as an interim +workaround, we agreed to add this dependency for glusterfs-ganesha package. + +label : DOWNSTREAM ONLY + +Change-Id: I6ac1003103755d7534dd079c821bbaacd8dd94b8 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167167 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index e55e255..bc27058 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -466,6 +466,12 @@ Requires: pcs, dbus + Requires: cman, pacemaker, corosync + %endif + ++%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 5 ) ++# we need portblock resource-agent in 3.9.5 and later. ++Requires: resource-agents >= 3.9.5 ++Requires: net-tools ++%endif ++ + %if ( 0%{?fedora} && 0%{?fedora} > 25 ) + Requires: selinux-policy >= 3.13.1-160 + Requires(post): policycoreutils-python-utils +@@ -1951,6 +1957,14 @@ fi + %endif + + %changelog ++* Sun Apr 7 2019 Soumya Koduri ++- As an interim fix add dependency on netstat(/net-tools) for glusterfs-ganesha package (#1395574) ++ ++* Sun Apr 7 2019 Soumya Koduri ++- Add dependency on portblock resource agent for ganesha package (#1278336) ++- Fix incorrect Requires for portblock resource agent (#1278336) ++- Update version checks for portblock resource agent on RHEL (#1278336) ++ + * Sat Apr 6 2019 Jiffin Tony Thottan + - Adding ganesha ha resources back in gluster repository + +-- +1.8.3.1 + diff --git a/SOURCES/0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch b/SOURCES/0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch new file mode 100644 index 0000000..2d12285 --- /dev/null +++ b/SOURCES/0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch @@ -0,0 +1,82 @@ +From a80743a3053798521ae4dd830adcde8bc7da11b6 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Tue, 20 Feb 2018 11:50:33 +0530 +Subject: [PATCH 083/124] common-ha: enable and disable selinux + ganesha_use_fusefs + +Adding missing changes in a downstream backport(https://code.engineering.redhat.com/gerrit/#/c/109845/) + +Label: DOWNSTREAM ONLY + +Change-Id: I59fd2fc2228ded9547c2d1e08c22f7a10c35f86f +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167244 +Reviewed-by: Soumya Koduri +Reviewed-by: Kaleb Keithley +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index bc27058..2149f86 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -472,11 +472,16 @@ Requires: resource-agents >= 3.9.5 + Requires: net-tools + %endif + +-%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) ++%if ( 0%{?rhel} ) + Requires: selinux-policy >= 3.13.1-160 ++Requires(post): policycoreutils-python ++Requires(postun): policycoreutils-python ++%else + Requires(post): policycoreutils-python-utils + Requires(postun): policycoreutils-python-utils + %endif ++%endif + + %description ganesha + GlusterFS is a distributed file-system capable of scaling to several +@@ -936,7 +941,7 @@ exit 0 + %endif + + %if ( 0%{!?_without_server:1} ) +-%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) + %post ganesha + semanage boolean -m ganesha_use_fusefs --on + exit 0 +@@ -1076,7 +1081,7 @@ exit 0 + %endif + + %if ( 0%{!?_without_server:1} ) +-%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) + %postun ganesha + semanage boolean -m ganesha_use_fusefs --off + exit 0 +@@ -1087,7 +1092,7 @@ exit 0 + ## All %%trigger should be placed here and keep them sorted + ## + %if ( 0%{!?_without_server:1} ) +-%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) + %trigger ganesha -- selinux-policy-targeted + semanage boolean -m ganesha_use_fusefs --on + exit 0 +@@ -1098,7 +1103,7 @@ exit 0 + ## All %%triggerun should be placed here and keep them sorted + ## + %if ( 0%{!?_without_server:1} ) +-%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) + %triggerun ganesha -- selinux-policy-targeted + semanage boolean -m ganesha_use_fusefs --off + exit 0 +-- +1.8.3.1 + diff --git a/SOURCES/0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch b/SOURCES/0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch new file mode 100644 index 0000000..04f8013 --- /dev/null +++ b/SOURCES/0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch @@ -0,0 +1,37 @@ +From d7bee4a4ad0878003e19711e20994c42c4d2bd9e Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 9 Apr 2019 16:15:09 +0530 +Subject: [PATCH 084/124] glusterd: Fix duplicate client_op_version in info + file + +This must have been introduced while applying downstream only patches at +RHGS 3.5.0 branch. + +Change-Id: I231249cca2a7bce29ef53cf95f9d2377b8203283 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/167341 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index fb52957..351bd9e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -1022,11 +1022,6 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) + goto out; + } + +- snprintf(buf, sizeof(buf), "%d", volinfo->client_op_version); +- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, +- buf); +- if (ret) +- goto out; + if (volinfo->caps) { + snprintf(buf, sizeof(buf), "%d", volinfo->caps); + ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_CAPS, buf); +-- +1.8.3.1 + diff --git a/SOURCES/0085-Revert-all-remove-code-which-is-not-being-considered.patch b/SOURCES/0085-Revert-all-remove-code-which-is-not-being-considered.patch new file mode 100644 index 0000000..6addaff --- /dev/null +++ b/SOURCES/0085-Revert-all-remove-code-which-is-not-being-considered.patch @@ -0,0 +1,8976 @@ +From 379b9f7247a4daac9545e3dec79d3c2660111d8d Mon Sep 17 00:00:00 2001 +From: Hari Gowtham +Date: Mon, 8 Apr 2019 11:32:09 +0530 +Subject: [PATCH 085/124] Revert "all: remove code which is not being + considered in build" + +This reverts most part of commit 8293d21280fd6ddfc9bb54068cf87794fc6be207. +It adds in the changes for tier and CTR with the neccesary changes for building it. + +Label: DOWNSTREAM ONLY + +Change-Id: I8f7978618f2a6a949b09dbcfd25722494cb8f1cd +Signed-off-by: Hari Gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/166245 +Reviewed-by: Nithya Balachandran +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + Makefile.am | 8 +- + configure.ac | 34 + + glusterfs.spec.in | 19 + + libglusterfs/Makefile.am | 4 +- + libglusterfs/src/glusterfs/mem-types.h | 1 + + xlators/cluster/dht/src/Makefile.am | 14 +- + xlators/cluster/dht/src/dht-rebalance.c | 12 + + xlators/cluster/dht/src/tier-common.c | 1199 ++++++++ + xlators/cluster/dht/src/tier-common.h | 55 + + xlators/cluster/dht/src/tier.c | 3105 ++++++++++++++++++++ + xlators/cluster/dht/src/tier.h | 110 + + xlators/features/Makefile.am | 2 +- + xlators/features/changetimerecorder/Makefile.am | 3 + + .../features/changetimerecorder/src/Makefile.am | 26 + + .../changetimerecorder/src/changetimerecorder.c | 2371 +++++++++++++++ + .../changetimerecorder/src/changetimerecorder.h | 21 + + .../features/changetimerecorder/src/ctr-helper.c | 293 ++ + .../features/changetimerecorder/src/ctr-helper.h | 854 ++++++ + .../features/changetimerecorder/src/ctr-messages.h | 61 + + .../changetimerecorder/src/ctr-xlator-ctx.c | 362 +++ + .../changetimerecorder/src/ctr-xlator-ctx.h | 68 + + .../changetimerecorder/src/ctr_mem_types.h | 22 + + 22 files changed, 8637 insertions(+), 7 deletions(-) + create mode 100644 xlators/cluster/dht/src/tier-common.c + create mode 100644 xlators/cluster/dht/src/tier-common.h + create mode 100644 xlators/cluster/dht/src/tier.c + create mode 100644 xlators/cluster/dht/src/tier.h + create mode 100644 xlators/features/changetimerecorder/Makefile.am + create mode 100644 xlators/features/changetimerecorder/src/Makefile.am + create mode 100644 xlators/features/changetimerecorder/src/changetimerecorder.c + create mode 100644 xlators/features/changetimerecorder/src/changetimerecorder.h + create mode 100644 xlators/features/changetimerecorder/src/ctr-helper.c + create mode 100644 xlators/features/changetimerecorder/src/ctr-helper.h + create mode 100644 xlators/features/changetimerecorder/src/ctr-messages.h + create mode 100644 xlators/features/changetimerecorder/src/ctr-xlator-ctx.c + create mode 100644 xlators/features/changetimerecorder/src/ctr-xlator-ctx.h + create mode 100644 xlators/features/changetimerecorder/src/ctr_mem_types.h + +diff --git a/Makefile.am b/Makefile.am +index e0c795f..613382f 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -3,7 +3,7 @@ SOURCES = site.h + EXTRA_DIST = autogen.sh \ + COPYING-GPLV2 COPYING-LGPLV3 COMMITMENT \ + INSTALL README.md AUTHORS THANKS NEWS \ +- glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in \ ++ glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in libgfdb.pc.in \ + run-tests.sh \ + build-aux/pkg-version \ + contrib/umountd \ +@@ -15,8 +15,12 @@ SUBDIRS = $(ARGP_STANDALONE_DIR) rpc/xdr/gen libglusterfs rpc api xlators \ + + pkgconfigdir = @pkgconfigdir@ + pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc ++if USE_GFDB ++pkgconfig_DATA += libgfdb.pc ++endif + +-CLEANFILES = glusterfs-api.pc libgfchangelog.pc contrib/umountd/Makefile ++CLEANFILES = glusterfs-api.pc libgfchangelog.pc libgfdb.pc \ ++ contrib/umountd/Makefile + + gitclean: distclean + find . -name Makefile.in -exec rm -f {} \; +diff --git a/configure.ac b/configure.ac +index baa811a..633e850 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -30,6 +30,7 @@ AC_CONFIG_HEADERS([config.h site.h]) + AC_CONFIG_FILES([Makefile + libglusterfs/Makefile + libglusterfs/src/Makefile ++ libglusterfs/src/gfdb/Makefile + geo-replication/src/peer_gsec_create + geo-replication/src/peer_mountbroker + geo-replication/src/peer_mountbroker.py +@@ -121,6 +122,8 @@ AC_CONFIG_FILES([Makefile + xlators/features/changelog/src/Makefile + xlators/features/changelog/lib/Makefile + xlators/features/changelog/lib/src/Makefile ++ xlators/features/changetimerecorder/Makefile ++ xlators/features/changetimerecorder/src/Makefile + xlators/features/locks/Makefile + xlators/features/locks/src/Makefile + xlators/features/quota/Makefile +@@ -237,6 +240,7 @@ AC_CONFIG_FILES([Makefile + contrib/umountd/Makefile + glusterfs-api.pc + libgfchangelog.pc ++ libgfdb.pc + api/Makefile + api/src/Makefile + api/examples/Makefile +@@ -866,6 +870,33 @@ AM_CONDITIONAL([USE_FIREWALLD],test ["x${BUILD_FIREWALLD}" = "xyes"]) + + #endof firewald section + ++# Data tiering requires sqlite ++AC_ARG_ENABLE([tiering], ++ AC_HELP_STRING([--disable-tiering], ++ [Disable data classification/tiering]), ++ [BUILD_GFDB="${enableval}"], [BUILD_GFDB="yes"]) ++ ++case $host_os in ++ darwin*) ++ SQLITE_LIBS="-lsqlite3" ++ AC_CHECK_HEADERS([sqlite3.h], AC_DEFINE(USE_GFDB, 1)) ++ ;; ++ *) ++ if test "x${BUILD_GFDB}" = "xyes"; then ++ PKG_CHECK_MODULES([SQLITE], [sqlite3], ++ AC_DEFINE(USE_GFDB, 1), ++ AC_MSG_ERROR([pass --disable-tiering to build without sqlite])) ++ else ++ AC_DEFINE(USE_GFDB, 0, [no sqlite, gfdb is disabled]) ++ fi ++ ;; ++esac ++ ++AC_SUBST(SQLITE_CFLAGS) ++AC_SUBST(SQLITE_LIBS) ++AM_CONDITIONAL(BUILD_GFDB, test "x${with_server}" = "xyes" -a "x${BUILD_GFDB}" = "xyes") ++AM_CONDITIONAL(USE_GFDB, test "x${with_server}" = "xyes" -a "x${BUILD_GFDB}" = "xyes") ++ + # xml-output + AC_ARG_ENABLE([xml-output], + AC_HELP_STRING([--disable-xml-output], +@@ -1544,6 +1575,8 @@ GFAPI_VERSION="7."${PACKAGE_VERSION} + LIBGFCHANGELOG_VERSION="0.0.1" + AC_SUBST(GFAPI_VERSION) + AC_SUBST(LIBGFCHANGELOG_VERSION) ++LIBGFDB_VERSION="0.0.1" ++AC_SUBST(LIBGFDB_VERSION) + + dnl libtool versioning + LIBGFXDR_LT_VERSION="0:1:0" +@@ -1584,6 +1617,7 @@ echo "XML output : $BUILD_XML_OUTPUT" + echo "Unit Tests : $BUILD_UNITTEST" + echo "Track priv ports : $TRACK_PRIVPORTS" + echo "POSIX ACLs : $BUILD_POSIX_ACLS" ++echo "Data Classification : $BUILD_GFDB" + echo "firewalld-config : $BUILD_FIREWALLD" + echo "Events : $BUILD_EVENTS" + echo "EC dynamic support : $EC_DYNAMIC_SUPPORT" +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 2149f86..e0607ba 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -154,6 +154,7 @@ + %global _without_events --disable-events + %global _without_georeplication --disable-georeplication + %global _with_gnfs %{nil} ++%global _without_tiering --disable-tiering + %global _without_ocf --without-ocf + %endif + +@@ -287,6 +288,9 @@ BuildRequires: libuuid-devel + %if ( 0%{?_with_cmocka:1} ) + BuildRequires: libcmocka-devel >= 1.0.1 + %endif ++%if ( 0%{!?_without_tiering:1} ) ++BuildRequires: sqlite-devel ++%endif + %if ( 0%{!?_without_georeplication:1} ) + BuildRequires: libattr-devel + %endif +@@ -797,6 +801,7 @@ export LDFLAGS + %{?_without_rdma} \ + %{?_without_server} \ + %{?_without_syslog} \ ++ %{?_without_tiering} \ + %{?_with_ipv6default} \ + %{?_without_libtirpc} + +@@ -1232,9 +1237,15 @@ exit 0 + %if ( 0%{?_without_server:1} ) + %exclude %{_libdir}/pkgconfig/libgfchangelog.pc + %exclude %{_libdir}/libgfchangelog.so ++%if ( 0%{!?_without_tiering:1} ) ++%{_libdir}/pkgconfig/libgfdb.pc ++%endif + %else + %{_libdir}/pkgconfig/libgfchangelog.pc + %{_libdir}/libgfchangelog.so ++%if ( 0%{!?_without_tiering:1} ) ++%{_libdir}/pkgconfig/libgfdb.pc ++%endif + %endif + + %files client-xlators +@@ -1330,6 +1341,10 @@ exit 0 + %files libs + %{_libdir}/*.so.* + %exclude %{_libdir}/libgfapi.* ++%if ( 0%{!?_without_tiering:1} ) ++# libgfdb is only needed server-side ++%exclude %{_libdir}/libgfdb.* ++%endif + + %files -n python%{_pythonver}-gluster + # introducing glusterfs module in site packages. +@@ -1417,6 +1432,10 @@ exit 0 + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so ++%if ( 0%{!?_without_tiering:1} ) ++ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changetimerecorder.so ++ %{_libdir}/libgfdb.so.* ++%endif + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix* +diff --git a/libglusterfs/Makefile.am b/libglusterfs/Makefile.am +index d471a3f..7e72f61 100644 +--- a/libglusterfs/Makefile.am ++++ b/libglusterfs/Makefile.am +@@ -1,3 +1,3 @@ +-SUBDIRS = src ++SUBDIRS = src src/gfdb + +-CLEANFILES = ++CLEANFILES = +diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h +index 832f68c..92730a9 100644 +--- a/libglusterfs/src/glusterfs/mem-types.h ++++ b/libglusterfs/src/glusterfs/mem-types.h +@@ -138,6 +138,7 @@ enum gf_common_mem_types_ { + gf_common_volfile_t, + gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */ + gf_common_mt_server_cmdline_t, /* used only in one location */ ++ gf_mt_gfdb_query_record_t, + gf_common_mt_end + }; + #endif +diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am +index 56f1f2a..5532047 100644 +--- a/xlators/cluster/dht/src/Makefile.am ++++ b/xlators/cluster/dht/src/Makefile.am +@@ -1,4 +1,7 @@ + xlator_LTLIBRARIES = dht.la nufa.la switch.la ++if BUILD_GFDB ++ xlator_LTLIBRARIES += tier.la ++endif + + AM_CFLAGS = -Wall $(GF_CFLAGS) + +@@ -13,6 +16,7 @@ dht_la_SOURCES = $(dht_common_source) dht.c + + nufa_la_SOURCES = $(dht_common_source) nufa.c + switch_la_SOURCES = $(dht_common_source) switch.c ++tier_la_SOURCES = $(dht_common_source) tier.c tier-common.c + + dht_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +@@ -23,15 +27,21 @@ nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + switch_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + ++tier_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(LIB_DL) ++tier_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la ++ + noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h \ +- dht-lock.h $(top_builddir)/xlators/lib/src/libxlator.h ++ dht-lock.h tier-common.h tier.h \ ++ $(top_builddir)/xlators/lib/src/libxlator.h + + AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ ++ -I$(top_srcdir)/libglusterfs/src/gfdb \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(top_srcdir)/xlators/lib/src \ + -DDATADIR=\"$(localstatedir)\" \ +- -DLIBDIR=\"$(libdir)\" ++ -DLIBDIR=\"$(libdir)\" \ ++ -DLIBGFDB_VERSION=\"$(LIBGFDB_VERSION)\" + + CLEANFILES = + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index e0f25b1..efbe8a4 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -8,6 +8,7 @@ + cases as published by the Free Software Foundation. + */ + ++#include "tier.h" + #include "dht-common.h" + #include + #include +@@ -2134,6 +2135,17 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + } + } + ++ /* store size of previous migrated file */ ++ if (defrag && defrag->tier_conf.is_tier) { ++ if (from != TIER_HASHED_SUBVOL) { ++ defrag->tier_conf.st_last_promoted_size = stbuf.ia_size; ++ } else { ++ /* Don't delete the linkto file on the hashed subvol */ ++ delete_src_linkto = _gf_false; ++ defrag->tier_conf.st_last_demoted_size = stbuf.ia_size; ++ } ++ } ++ + /* The src file is being unlinked after this so we don't need + to clean it up */ + clean_src = _gf_false; +diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c +new file mode 100644 +index 0000000..b22f477 +--- /dev/null ++++ b/xlators/cluster/dht/src/tier-common.c +@@ -0,0 +1,1199 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include ++#include ++#include "libxlator.h" ++#include "dht-common.h" ++#include ++#include "tier-common.h" ++#include "tier.h" ++ ++int ++dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata); ++ ++int ++tier_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ loc_t *oldloc = NULL; ++ loc_t *newloc = NULL; ++ ++ local = frame->local; ++ ++ oldloc = &local->loc; ++ newloc = &local->loc2; ++ ++ if (op_ret == -1) { ++ /* No continuation on DHT inode missing errors, as we should ++ * then have a good stbuf that states P2 happened. We would ++ * get inode missing if, the file completed migrated between ++ * the lookup and the link call */ ++ goto out; ++ } ++ ++ if (local->call_cnt != 1) { ++ goto out; ++ } ++ ++ local->call_cnt = 2; ++ ++ /* Do this on the hot tier now */ ++ ++ STACK_WIND(frame, tier_link_cbk, local->cached_subvol, ++ local->cached_subvol->fops->link, oldloc, newloc, xdata); ++ ++ return 0; ++ ++out: ++ DHT_STRIP_PHASE1_FLAGS(stbuf); ++ ++ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent, ++ postparent, NULL); ++ ++ return 0; ++} ++ ++int ++tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, ++ dict_t *xdata) ++{ ++ xlator_t *cached_subvol = NULL; ++ xlator_t *hashed_subvol = NULL; ++ int op_errno = -1; ++ int ret = -1; ++ dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(oldloc, err); ++ VALIDATE_OR_GOTO(newloc, err); ++ ++ conf = this->private; ++ ++ local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK); ++ if (!local) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ local->call_cnt = 1; ++ ++ cached_subvol = local->cached_subvol; ++ ++ if (!cached_subvol) { ++ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", ++ oldloc->path); ++ op_errno = ENOENT; ++ goto err; ++ } ++ ++ hashed_subvol = TIER_HASHED_SUBVOL; ++ ++ ret = loc_copy(&local->loc2, newloc); ++ if (ret == -1) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ if (hashed_subvol == cached_subvol) { ++ STACK_WIND(frame, dht_link_cbk, cached_subvol, ++ cached_subvol->fops->link, oldloc, newloc, xdata); ++ return 0; ++ } ++ ++ /* Create hardlinks to both the data file on the hot tier ++ and the linkto file on the cold tier */ ++ ++ gf_uuid_copy(local->gfid, oldloc->inode->gfid); ++ ++ STACK_WIND(frame, tier_link_cbk, hashed_subvol, hashed_subvol->fops->link, ++ oldloc, newloc, xdata); ++ ++ return 0; ++err: ++ op_errno = (op_errno == -1) ? errno : op_errno; ++ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); ++ return 0; ++} ++ ++int ++tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int op_ret, int op_errno, ++ struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->params) { ++ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); ++ } ++ ++ DHT_STACK_UNWIND(create, frame, -1, local->op_errno, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ ++ return 0; ++} ++ ++int ++tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) ++{ ++ xlator_t *prev = NULL; ++ int ret = -1; ++ dht_local_t *local = NULL; ++ xlator_t *hashed_subvol = NULL; ++ dht_conf_t *conf = NULL; ++ ++ local = frame->local; ++ conf = this->private; ++ ++ hashed_subvol = TIER_HASHED_SUBVOL; ++ ++ if (!local) { ++ op_ret = -1; ++ op_errno = EINVAL; ++ goto out; ++ } ++ ++ if (op_ret == -1) { ++ if (local->linked == _gf_true && local->xattr_req) { ++ local->op_errno = op_errno; ++ local->op_ret = op_ret; ++ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file( ++ local->xattr_req); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value to " ++ "unlink of migrating file"); ++ goto out; ++ } ++ ++ STACK_WIND(frame, tier_create_unlink_stale_linkto_cbk, ++ hashed_subvol, hashed_subvol->fops->unlink, &local->loc, ++ 0, local->xattr_req); ++ return 0; ++ } ++ goto out; ++ } ++ ++ prev = cookie; ++ ++ if (local->loc.parent) { ++ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0); ++ ++ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1); ++ } ++ ++ ret = dht_layout_preset(this, prev, inode); ++ if (ret != 0) { ++ gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s", ++ prev->name); ++ op_ret = -1; ++ op_errno = EINVAL; ++ goto out; ++ } ++ ++ local->op_errno = op_errno; ++ ++ if (local->linked == _gf_true) { ++ local->stbuf = *stbuf; ++ dht_linkfile_attr_heal(frame, this); ++ } ++out: ++ if (local) { ++ if (local->xattr_req) { ++ dict_del(local->xattr_req, TIER_LINKFILE_GFID); ++ } ++ } ++ ++ DHT_STRIP_PHASE1_FLAGS(stbuf); ++ ++ DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, ++ preparent, postparent, xdata); ++ ++ return 0; ++} ++ ++int ++tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *stbuf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *cached_subvol = NULL; ++ dht_conf_t *conf = NULL; ++ int ret = -1; ++ unsigned char *gfid = NULL; ++ ++ local = frame->local; ++ if (!local) { ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ if (op_ret == -1) { ++ local->op_errno = op_errno; ++ goto err; ++ } ++ ++ conf = this->private; ++ if (!conf) { ++ local->op_errno = EINVAL; ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ cached_subvol = TIER_UNHASHED_SUBVOL; ++ ++ if (local->params) { ++ dict_del(local->params, conf->link_xattr_name); ++ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); ++ } ++ ++ /* ++ * We will delete the linkfile if data file creation fails. ++ * When deleting this stale linkfile, there is a possibility ++ * for a race between this linkfile deletion and a stale ++ * linkfile deletion triggered by another lookup from different ++ * client. ++ * ++ * For eg: ++ * ++ * Client 1 Client 2 ++ * ++ * 1 linkfile created for foo ++ * ++ * 2 data file creation failed ++ * ++ * 3 creating a file with same name ++ * ++ * 4 lookup before creation deleted ++ * the linkfile created by client1 ++ * considering as a stale linkfile. ++ * ++ * 5 New linkfile created for foo ++ * with different gfid. ++ * ++ * 6 Trigger linkfile deletion as ++ * data file creation failed. ++ * ++ * 7 Linkfile deleted which is ++ * created by client2. ++ * ++ * 8 Data file created. ++ * ++ * With this race, we will end up having a file in a non-hashed subvol ++ * without a linkfile in hashed subvol. ++ * ++ * To avoid this, we store the gfid of linkfile created by client, So ++ * If we delete the linkfile , we validate gfid of existing file with ++ * stored value from posix layer. ++ * ++ * Storing this value in local->xattr_req as local->params was also used ++ * to create the data file. During the linkfile deletion we will use ++ * local->xattr_req dictionary. ++ */ ++ if (!local->xattr_req) { ++ local->xattr_req = dict_new(); ++ if (!local->xattr_req) { ++ local->op_errno = ENOMEM; ++ op_errno = ENOMEM; ++ goto err; ++ } ++ } ++ ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); ++ if (!gfid) { ++ local->op_errno = ENOMEM; ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ gf_uuid_copy(gfid, stbuf->ia_gfid); ++ ret = dict_set_dynptr(local->xattr_req, TIER_LINKFILE_GFID, gfid, ++ sizeof(uuid_t)); ++ if (ret) { ++ GF_FREE(gfid); ++ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value" ++ " : key = %s", ++ TIER_LINKFILE_GFID); ++ } ++ ++ STACK_WIND_COOKIE(frame, tier_create_cbk, cached_subvol, cached_subvol, ++ cached_subvol->fops->create, &local->loc, local->flags, ++ local->mode, local->umask, local->fd, local->params); ++ ++ return 0; ++err: ++ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, ++ NULL); ++ return 0; ++} ++ ++gf_boolean_t ++tier_is_hot_tier_decommissioned(xlator_t *this) ++{ ++ dht_conf_t *conf = NULL; ++ xlator_t *hot_tier = NULL; ++ int i = 0; ++ ++ conf = this->private; ++ hot_tier = conf->subvolumes[1]; ++ ++ if (conf->decommission_subvols_cnt) { ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (conf->decommissioned_bricks[i] && ++ conf->decommissioned_bricks[i] == hot_tier) ++ return _gf_true; ++ } ++ } ++ ++ return _gf_false; ++} ++ ++int ++tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *params) ++{ ++ int op_errno = -1; ++ dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; ++ xlator_t *hot_subvol = NULL; ++ xlator_t *cold_subvol = NULL; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(loc, err); ++ ++ conf = this->private; ++ ++ dht_get_du_info(frame, this, loc); ++ ++ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE); ++ if (!local) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ cold_subvol = TIER_HASHED_SUBVOL; ++ hot_subvol = TIER_UNHASHED_SUBVOL; ++ ++ if (conf->subvolumes[0] != cold_subvol) { ++ hot_subvol = conf->subvolumes[0]; ++ } ++ /* ++ * if hot tier full, write to cold. ++ * Also if hot tier is full, create in cold ++ */ ++ if (dht_is_subvol_filled(this, hot_subvol) || ++ tier_is_hot_tier_decommissioned(this)) { ++ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, ++ cold_subvol->name); ++ ++ STACK_WIND_COOKIE(frame, tier_create_cbk, cold_subvol, cold_subvol, ++ cold_subvol->fops->create, loc, flags, mode, umask, ++ fd, params); ++ } else { ++ local->params = dict_ref(params); ++ local->flags = flags; ++ local->mode = mode; ++ local->umask = umask; ++ local->cached_subvol = hot_subvol; ++ local->hashed_subvol = cold_subvol; ++ ++ gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", loc->path, ++ hot_subvol->name, cold_subvol->name); ++ ++ dht_linkfile_create(frame, tier_create_linkfile_create_cbk, this, ++ hot_subvol, cold_subvol, loc); ++ ++ goto out; ++ } ++out: ++ return 0; ++ ++err: ++ ++ op_errno = (op_errno == -1) ? errno : op_errno; ++ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, ++ NULL); ++ ++ return 0; ++} ++ ++int ++tier_unlink_nonhashed_linkfile_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int op_ret, int op_errno, ++ struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *prev = NULL; ++ ++ local = frame->local; ++ prev = cookie; ++ ++ LOCK(&frame->lock); ++ { ++ if ((op_ret == -1) && (op_errno != ENOENT)) { ++ local->op_errno = op_errno; ++ local->op_ret = op_ret; ++ gf_msg_debug(this->name, op_errno, ++ "Unlink link: subvolume %s" ++ " returned -1", ++ prev->name); ++ goto unlock; ++ } ++ ++ local->op_ret = 0; ++ } ++unlock: ++ UNLOCK(&frame->lock); ++ ++ if (local->op_ret == -1) ++ goto err; ++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preparent, &local->postparent, NULL); ++ ++ return 0; ++ ++err: ++ DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL); ++ return 0; ++} ++ ++int ++tier_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, inode_t *inode, ++ struct iatt *preparent, dict_t *xdata, ++ struct iatt *postparent) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *prev = NULL; ++ dht_conf_t *conf = NULL; ++ xlator_t *hot_subvol = NULL; ++ ++ local = frame->local; ++ prev = cookie; ++ conf = this->private; ++ hot_subvol = TIER_UNHASHED_SUBVOL; ++ ++ if (!op_ret) { ++ /* ++ * linkfile present on hot tier. unlinking the linkfile ++ */ ++ STACK_WIND_COOKIE(frame, tier_unlink_nonhashed_linkfile_cbk, hot_subvol, ++ hot_subvol, hot_subvol->fops->unlink, &local->loc, ++ local->flags, NULL); ++ return 0; ++ } ++ ++ LOCK(&frame->lock); ++ { ++ if (op_errno == ENOENT) { ++ local->op_ret = 0; ++ local->op_errno = op_errno; ++ } else { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ } ++ gf_msg_debug(this->name, op_errno, "Lookup : subvolume %s returned -1", ++ prev->name); ++ } ++ ++ UNLOCK(&frame->lock); ++ ++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preparent, &local->postparent, xdata); ++ ++ return 0; ++} ++ ++int ++tier_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *prev = NULL; ++ ++ local = frame->local; ++ prev = cookie; ++ ++ LOCK(&frame->lock); ++ { ++ /* Ignore EINVAL for tier to ignore error when the file ++ does not exist on the other tier */ ++ if ((op_ret == -1) && !((op_errno == ENOENT) || (op_errno == EINVAL))) { ++ local->op_errno = op_errno; ++ local->op_ret = op_ret; ++ gf_msg_debug(this->name, op_errno, ++ "Unlink link: subvolume %s" ++ " returned -1", ++ prev->name); ++ goto unlock; ++ } ++ ++ local->op_ret = 0; ++ } ++unlock: ++ UNLOCK(&frame->lock); ++ ++ if (local->op_ret == -1) ++ goto err; ++ ++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preparent, &local->postparent, xdata); ++ ++ return 0; ++ ++err: ++ DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL); ++ return 0; ++} ++ ++int32_t ++tier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *prev = NULL; ++ struct iatt *stbuf = NULL; ++ dht_conf_t *conf = NULL; ++ int ret = -1; ++ xlator_t *hot_tier = NULL; ++ xlator_t *cold_tier = NULL; ++ ++ local = frame->local; ++ prev = cookie; ++ conf = this->private; ++ ++ cold_tier = TIER_HASHED_SUBVOL; ++ hot_tier = TIER_UNHASHED_SUBVOL; ++ ++ LOCK(&frame->lock); ++ { ++ if (op_ret == -1) { ++ if (op_errno == ENOENT) { ++ local->op_ret = 0; ++ } else { ++ local->op_ret = -1; ++ local->op_errno = op_errno; ++ } ++ gf_msg_debug(this->name, op_errno, ++ "Unlink: subvolume %s returned -1" ++ " with errno = %d", ++ prev->name, op_errno); ++ goto unlock; ++ } ++ ++ local->op_ret = 0; ++ ++ local->postparent = *postparent; ++ local->preparent = *preparent; ++ ++ if (local->loc.parent) { ++ dht_inode_ctx_time_update(local->loc.parent, this, ++ &local->preparent, 0); ++ dht_inode_ctx_time_update(local->loc.parent, this, ++ &local->postparent, 1); ++ } ++ } ++unlock: ++ UNLOCK(&frame->lock); ++ ++ if (local->op_ret) ++ goto out; ++ ++ if (cold_tier != local->cached_subvol) { ++ /* ++ * File is present in hot tier, so there will be ++ * a link file on cold tier, deleting the linkfile ++ * from cold tier ++ */ ++ STACK_WIND_COOKIE(frame, tier_unlink_linkfile_cbk, cold_tier, cold_tier, ++ cold_tier->fops->unlink, &local->loc, local->flags, ++ xdata); ++ return 0; ++ } ++ ++ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); ++ if (!ret && stbuf && ++ ((IS_DHT_MIGRATION_PHASE2(stbuf)) || IS_DHT_MIGRATION_PHASE1(stbuf))) { ++ /* ++ * File is migrating from cold to hot tier. ++ * Delete the destination linkfile. ++ */ ++ STACK_WIND_COOKIE(frame, tier_unlink_lookup_cbk, hot_tier, hot_tier, ++ hot_tier->fops->lookup, &local->loc, NULL); ++ return 0; ++ } ++ ++out: ++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preparent, &local->postparent, xdata); ++ ++ return 0; ++} ++ ++int ++tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, ++ dict_t *xdata) ++{ ++ xlator_t *cached_subvol = NULL; ++ xlator_t *hashed_subvol = NULL; ++ dht_conf_t *conf = NULL; ++ int op_errno = -1; ++ dht_local_t *local = NULL; ++ int ret = -1; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(loc, err); ++ ++ conf = this->private; ++ ++ local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK); ++ if (!local) { ++ op_errno = ENOMEM; ++ ++ goto err; ++ } ++ ++ hashed_subvol = TIER_HASHED_SUBVOL; ++ ++ cached_subvol = local->cached_subvol; ++ if (!cached_subvol) { ++ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", ++ loc->path); ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ local->flags = xflag; ++ if (IA_ISREG(loc->inode->ia_type) && (hashed_subvol == cached_subvol)) { ++ /* ++ * File resides in cold tier. We need to stat ++ * the file to see if it is being promoted. ++ * If yes we need to delete the destination ++ * file as well. ++ * ++ * Currently we are doing this check only for ++ * regular files. ++ */ ++ xdata = xdata ? dict_ref(xdata) : dict_new(); ++ if (xdata) { ++ ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "Failed to set dictionary key %s", ++ DHT_IATT_IN_XDATA_KEY); ++ } ++ } ++ } ++ ++ /* ++ * File is on hot tier, delete the data file first, then ++ * linkfile from cold. ++ */ ++ STACK_WIND_COOKIE(frame, tier_unlink_cbk, cached_subvol, cached_subvol, ++ cached_subvol->fops->unlink, loc, xflag, xdata); ++ if (xdata) ++ dict_unref(xdata); ++ return 0; ++err: ++ op_errno = (op_errno == -1) ? errno : op_errno; ++ DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); ++ ++ return 0; ++} ++ ++int ++tier_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) ++{ ++ gf_dirent_t entries; ++ gf_dirent_t *orig_entry = NULL; ++ gf_dirent_t *entry = NULL; ++ int count = 0; ++ ++ INIT_LIST_HEAD(&entries.list); ++ ++ if (op_ret < 0) ++ goto unwind; ++ ++ list_for_each_entry(orig_entry, (&orig_entries->list), list) ++ { ++ entry = gf_dirent_for_name(orig_entry->d_name); ++ if (!entry) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, ++ "Memory allocation failed "); ++ goto unwind; ++ } ++ ++ entry->d_off = orig_entry->d_off; ++ entry->d_ino = orig_entry->d_ino; ++ entry->d_type = orig_entry->d_type; ++ entry->d_len = orig_entry->d_len; ++ ++ list_add_tail(&entry->list, &entries.list); ++ count++; ++ } ++ op_ret = count; ++ ++unwind: ++ if (op_ret < 0) ++ op_ret = 0; ++ ++ DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL); ++ ++ gf_dirent_free(&entries); ++ ++ return 0; ++} ++ ++int ++tier_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ gf_dirent_t entries; ++ gf_dirent_t *orig_entry = NULL; ++ gf_dirent_t *entry = NULL; ++ xlator_t *prev = NULL; ++ xlator_t *next_subvol = NULL; ++ off_t next_offset = 0; ++ int count = 0; ++ dht_conf_t *conf = NULL; ++ int ret = 0; ++ inode_table_t *itable = NULL; ++ inode_t *inode = NULL; ++ ++ INIT_LIST_HEAD(&entries.list); ++ prev = cookie; ++ local = frame->local; ++ itable = local->fd ? local->fd->inode->table : NULL; ++ ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, unwind); ++ ++ if (op_ret < 0) ++ goto done; ++ ++ list_for_each_entry(orig_entry, (&orig_entries->list), list) ++ { ++ next_offset = orig_entry->d_off; ++ ++ if (IA_ISINVAL(orig_entry->d_stat.ia_type)) { ++ /*stat failed somewhere- ignore this entry*/ ++ continue; ++ } ++ ++ entry = gf_dirent_for_name(orig_entry->d_name); ++ if (!entry) { ++ goto unwind; ++ } ++ ++ entry->d_off = orig_entry->d_off; ++ entry->d_stat = orig_entry->d_stat; ++ entry->d_ino = orig_entry->d_ino; ++ entry->d_type = orig_entry->d_type; ++ entry->d_len = orig_entry->d_len; ++ ++ if (orig_entry->dict) ++ entry->dict = dict_ref(orig_entry->dict); ++ ++ if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict, ++ conf->link_xattr_name)) { ++ goto entries; ++ ++ } else if (IA_ISDIR(entry->d_stat.ia_type)) { ++ if (orig_entry->inode) { ++ dht_inode_ctx_time_update(orig_entry->inode, this, ++ &entry->d_stat, 1); ++ } ++ } else { ++ if (orig_entry->inode) { ++ ret = dht_layout_preset(this, prev, orig_entry->inode); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ DHT_MSG_LAYOUT_SET_FAILED, ++ "failed to link the layout " ++ "in inode"); ++ ++ entry->inode = inode_ref(orig_entry->inode); ++ } else if (itable) { ++ /* ++ * orig_entry->inode might be null if any upper ++ * layer xlators below client set to null, to ++ * force a lookup on the inode even if the inode ++ * is present in the inode table. In that case ++ * we just update the ctx to make sure we didn't ++ * missed anything. ++ */ ++ inode = inode_find(itable, orig_entry->d_stat.ia_gfid); ++ if (inode) { ++ ret = dht_layout_preset(this, TIER_HASHED_SUBVOL, inode); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ DHT_MSG_LAYOUT_SET_FAILED, ++ "failed to link the layout" ++ " in inode"); ++ inode_unref(inode); ++ inode = NULL; ++ } ++ } ++ } ++ ++ entries: ++ list_add_tail(&entry->list, &entries.list); ++ count++; ++ } ++ op_ret = count; ++ ++done: ++ if (count == 0) { ++ /* non-zero next_offset means that ++ EOF is not yet hit on the current subvol ++ */ ++ if (next_offset != 0) { ++ next_subvol = prev; ++ } else { ++ goto unwind; ++ } ++ ++ STACK_WIND_COOKIE(frame, tier_readdirp_cbk, next_subvol, next_subvol, ++ next_subvol->fops->readdirp, local->fd, local->size, ++ next_offset, local->xattr); ++ return 0; ++ } ++ ++unwind: ++ if (op_ret < 0) ++ op_ret = 0; ++ ++ DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL); ++ ++ gf_dirent_free(&entries); ++ ++ return 0; ++} ++ ++int ++tier_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t yoff, int whichop, dict_t *dict) ++{ ++ dht_local_t *local = NULL; ++ int op_errno = -1; ++ xlator_t *hashed_subvol = NULL; ++ int ret = 0; ++ dht_conf_t *conf = NULL; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(fd, err); ++ VALIDATE_OR_GOTO(this->private, err); ++ ++ conf = this->private; ++ ++ local = dht_local_init(frame, NULL, NULL, whichop); ++ if (!local) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ local->fd = fd_ref(fd); ++ local->size = size; ++ local->xattr_req = (dict) ? dict_ref(dict) : NULL; ++ ++ hashed_subvol = TIER_HASHED_SUBVOL; ++ ++ /* TODO: do proper readdir */ ++ if (whichop == GF_FOP_READDIRP) { ++ if (dict) ++ local->xattr = dict_ref(dict); ++ else ++ local->xattr = dict_new(); ++ ++ if (local->xattr) { ++ ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value" ++ " : key = %s", ++ conf->link_xattr_name); ++ } ++ ++ STACK_WIND_COOKIE(frame, tier_readdirp_cbk, hashed_subvol, ++ hashed_subvol, hashed_subvol->fops->readdirp, fd, ++ size, yoff, local->xattr); ++ ++ } else { ++ STACK_WIND_COOKIE(frame, tier_readdir_cbk, hashed_subvol, hashed_subvol, ++ hashed_subvol->fops->readdir, fd, size, yoff, ++ local->xattr); ++ } ++ ++ return 0; ++ ++err: ++ op_errno = (op_errno == -1) ? errno : op_errno; ++ DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL); ++ ++ return 0; ++} ++ ++int ++tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t yoff, dict_t *xdata) ++{ ++ int op = GF_FOP_READDIR; ++ dht_conf_t *conf = NULL; ++ int i = 0; ++ ++ conf = this->private; ++ if (!conf) ++ goto out; ++ ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (!conf->subvolume_status[i]) { ++ op = GF_FOP_READDIRP; ++ break; ++ } ++ } ++ ++ if (conf->use_readdirp) ++ op = GF_FOP_READDIRP; ++ ++out: ++ tier_do_readdir(frame, this, fd, size, yoff, op, 0); ++ return 0; ++} ++ ++int ++tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t yoff, dict_t *dict) ++{ ++ tier_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict); ++ return 0; ++} ++ ++int ++tier_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, struct statvfs *statvfs, dict_t *xdata) ++{ ++ gf_boolean_t event = _gf_false; ++ qdstatfs_action_t action = qdstatfs_action_OFF; ++ dht_local_t *local = NULL; ++ int this_call_cnt = 0; ++ int bsize = 0; ++ int frsize = 0; ++ GF_UNUSED int ret = 0; ++ unsigned long new_usage = 0; ++ unsigned long cur_usage = 0; ++ xlator_t *prev = NULL; ++ dht_conf_t *conf = NULL; ++ tier_statvfs_t *tier_stat = NULL; ++ ++ prev = cookie; ++ local = frame->local; ++ GF_ASSERT(local); ++ ++ conf = this->private; ++ ++ if (xdata) ++ ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event); ++ ++ tier_stat = &local->tier_statvfs; ++ ++ LOCK(&frame->lock); ++ { ++ if (op_ret == -1) { ++ local->op_errno = op_errno; ++ goto unlock; ++ } ++ if (!statvfs) { ++ op_errno = EINVAL; ++ local->op_ret = -1; ++ goto unlock; ++ } ++ local->op_ret = 0; ++ ++ if (local->quota_deem_statfs) { ++ if (event == _gf_true) { ++ action = qdstatfs_action_COMPARE; ++ } else { ++ action = qdstatfs_action_NEGLECT; ++ } ++ } else { ++ if (event == _gf_true) { ++ action = qdstatfs_action_REPLACE; ++ local->quota_deem_statfs = _gf_true; ++ } ++ } ++ ++ if (local->quota_deem_statfs) { ++ switch (action) { ++ case qdstatfs_action_NEGLECT: ++ goto unlock; ++ ++ case qdstatfs_action_REPLACE: ++ local->statvfs = *statvfs; ++ goto unlock; ++ ++ case qdstatfs_action_COMPARE: ++ new_usage = statvfs->f_blocks - statvfs->f_bfree; ++ cur_usage = local->statvfs.f_blocks - ++ local->statvfs.f_bfree; ++ ++ /* Take the max of the usage from subvols */ ++ if (new_usage >= cur_usage) ++ local->statvfs = *statvfs; ++ goto unlock; ++ ++ default: ++ break; ++ } ++ } ++ ++ if (local->statvfs.f_bsize != 0) { ++ bsize = max(local->statvfs.f_bsize, statvfs->f_bsize); ++ frsize = max(local->statvfs.f_frsize, statvfs->f_frsize); ++ dht_normalize_stats(&local->statvfs, bsize, frsize); ++ dht_normalize_stats(statvfs, bsize, frsize); ++ } else { ++ local->statvfs.f_bsize = statvfs->f_bsize; ++ local->statvfs.f_frsize = statvfs->f_frsize; ++ } ++ ++ if (prev == TIER_HASHED_SUBVOL) { ++ local->statvfs.f_blocks = statvfs->f_blocks; ++ local->statvfs.f_files = statvfs->f_files; ++ local->statvfs.f_fsid = statvfs->f_fsid; ++ local->statvfs.f_flag = statvfs->f_flag; ++ local->statvfs.f_namemax = statvfs->f_namemax; ++ tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree); ++ tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail); ++ tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree); ++ tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail); ++ tier_stat->hashed_fsid = statvfs->f_fsid; ++ } else { ++ tier_stat->unhashed_fsid = statvfs->f_fsid; ++ tier_stat->unhashed_blocks_used = (statvfs->f_blocks - ++ statvfs->f_bfree); ++ tier_stat->unhashed_pblocks_used = (statvfs->f_blocks - ++ statvfs->f_bavail); ++ tier_stat->unhashed_files_used = (statvfs->f_files - ++ statvfs->f_ffree); ++ tier_stat->unhashed_pfiles_used = (statvfs->f_files - ++ statvfs->f_favail); ++ } ++ } ++unlock: ++ UNLOCK(&frame->lock); ++ ++ this_call_cnt = dht_frame_return(frame); ++ if (is_last_call(this_call_cnt)) { ++ if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) { ++ tier_stat->blocks_used += tier_stat->unhashed_blocks_used; ++ tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used; ++ tier_stat->files_used += tier_stat->unhashed_files_used; ++ tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used; ++ } ++ local->statvfs.f_bfree = local->statvfs.f_blocks - ++ tier_stat->blocks_used; ++ local->statvfs.f_bavail = local->statvfs.f_blocks - ++ tier_stat->pblocks_used; ++ local->statvfs.f_ffree = local->statvfs.f_files - tier_stat->files_used; ++ local->statvfs.f_favail = local->statvfs.f_files - ++ tier_stat->pfiles_used; ++ DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno, ++ &local->statvfs, xdata); ++ } ++ ++ return 0; ++} ++ ++int ++tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; ++ int op_errno = -1; ++ int i = -1; ++ inode_t *inode = NULL; ++ inode_table_t *itable = NULL; ++ uuid_t root_gfid = { ++ 0, ++ }; ++ loc_t newloc = { ++ 0, ++ }; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(loc, err); ++ VALIDATE_OR_GOTO(this->private, err); ++ ++ conf = this->private; ++ ++ local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS); ++ if (!local) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) { ++ itable = loc->inode->table; ++ if (!itable) { ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ loc = &local->loc2; ++ root_gfid[15] = 1; ++ ++ inode = inode_find(itable, root_gfid); ++ if (!inode) { ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ dht_build_root_loc(inode, &newloc); ++ loc = &newloc; ++ } ++ ++ local->call_cnt = conf->subvolume_cnt; ++ ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ STACK_WIND_COOKIE(frame, tier_statfs_cbk, conf->subvolumes[i], ++ conf->subvolumes[i], ++ conf->subvolumes[i]->fops->statfs, loc, xdata); ++ } ++ ++ return 0; ++ ++err: ++ op_errno = (op_errno == -1) ? errno : op_errno; ++ DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); ++ ++ return 0; ++} +diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h +new file mode 100644 +index 0000000..b1ebaa8 +--- /dev/null ++++ b/xlators/cluster/dht/src/tier-common.h +@@ -0,0 +1,55 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef _TIER_COMMON_H_ ++#define _TIER_COMMON_H_ ++/* Function definitions */ ++int ++tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int op_ret, int op_errno, ++ struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata); ++ ++int ++tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata); ++ ++int ++tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *stbuf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata); ++ ++int ++tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *params); ++ ++int32_t ++tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, ++ dict_t *xdata); ++ ++int32_t ++tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t off, dict_t *dict); ++ ++int ++tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t yoff, dict_t *xdata); ++ ++int ++tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, ++ dict_t *xdata); ++ ++int ++tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); ++ ++#endif +diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c +new file mode 100644 +index 0000000..94b4c63 +--- /dev/null ++++ b/xlators/cluster/dht/src/tier.c +@@ -0,0 +1,3105 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include ++ ++#include "dht-common.h" ++#include "tier.h" ++#include "tier-common.h" ++#include ++#include ++#include "tier-ctr-interface.h" ++ ++/*Hard coded DB info*/ ++static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3; ++/*Hard coded DB info*/ ++ ++/*Mutex for updating the data movement stats*/ ++static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER; ++ ++/* Stores the path location of promotion query files */ ++static char *promotion_qfile; ++/* Stores the path location of demotion query files */ ++static char *demotion_qfile; ++ ++static void *libhandle; ++static gfdb_methods_t gfdb_methods; ++ ++#define DB_QUERY_RECORD_SIZE 4096 ++ ++/* ++ * Closes all the fds and frees the qfile_array ++ * */ ++static void ++qfile_array_free(tier_qfile_array_t *qfile_array) ++{ ++ ssize_t i = 0; ++ ++ if (qfile_array) { ++ if (qfile_array->fd_array) { ++ for (i = 0; i < qfile_array->array_size; i++) { ++ if (qfile_array->fd_array[i] != -1) { ++ sys_close(qfile_array->fd_array[i]); ++ } ++ } ++ } ++ GF_FREE(qfile_array->fd_array); ++ } ++ GF_FREE(qfile_array); ++} ++ ++/* Create a new query file list with given size */ ++static tier_qfile_array_t * ++qfile_array_new(ssize_t array_size) ++{ ++ int ret = -1; ++ tier_qfile_array_t *qfile_array = NULL; ++ ssize_t i = 0; ++ ++ GF_VALIDATE_OR_GOTO("tier", (array_size > 0), out); ++ ++ qfile_array = GF_CALLOC(1, sizeof(tier_qfile_array_t), ++ gf_tier_mt_qfile_array_t); ++ if (!qfile_array) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to allocate memory for tier_qfile_array_t"); ++ goto out; ++ } ++ ++ qfile_array->fd_array = GF_MALLOC(array_size * sizeof(int), ++ gf_dht_mt_int32_t); ++ if (!qfile_array->fd_array) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to allocate memory for " ++ "tier_qfile_array_t->fd_array"); ++ goto out; ++ } ++ ++ /* Init all the fds to -1 */ ++ for (i = 0; i < array_size; i++) { ++ qfile_array->fd_array[i] = -1; ++ } ++ ++ qfile_array->array_size = array_size; ++ qfile_array->next_index = 0; ++ ++ /* Set exhausted count to list size as the list is empty */ ++ qfile_array->exhausted_count = qfile_array->array_size; ++ ++ ret = 0; ++out: ++ if (ret) { ++ qfile_array_free(qfile_array); ++ qfile_array = NULL; ++ } ++ return qfile_array; ++} ++ ++/* Checks if the query file list is empty or totally exhausted. */ ++static gf_boolean_t ++is_qfile_array_empty(tier_qfile_array_t *qfile_array) ++{ ++ return (qfile_array->exhausted_count == qfile_array->array_size) ++ ? _gf_true ++ : _gf_false; ++} ++ ++/* Shifts the next_fd pointer to the next available fd in the list */ ++static void ++shift_next_index(tier_qfile_array_t *qfile_array) ++{ ++ int qfile_fd = 0; ++ int spin_count = 0; ++ ++ if (is_qfile_array_empty(qfile_array)) { ++ return; ++ } ++ ++ do { ++ /* change next_index in a rotional manner */ ++ (qfile_array->next_index == (qfile_array->array_size - 1)) ++ ? qfile_array->next_index = 0 ++ : qfile_array->next_index++; ++ ++ qfile_fd = (qfile_array->fd_array[qfile_array->next_index]); ++ ++ spin_count++; ++ ++ } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size)); ++} ++ ++/* ++ * This is a non-thread safe function to read query records ++ * from a list of query files in a Round-Robin manner. ++ * As in when the query files get exhuasted they are closed. ++ * Returns: ++ * 0 if all the query records in all the query files of the list are ++ * exhausted. ++ * > 0 if a query record is successfully read. Indicates the size of the query ++ * record read. ++ * < 0 if there was failure ++ * */ ++static int ++read_query_record_list(tier_qfile_array_t *qfile_array, ++ gfdb_query_record_t **query_record) ++{ ++ int ret = -1; ++ int qfile_fd = 0; ++ ++ GF_VALIDATE_OR_GOTO("tier", qfile_array, out); ++ GF_VALIDATE_OR_GOTO("tier", qfile_array->fd_array, out); ++ ++ do { ++ if (is_qfile_array_empty(qfile_array)) { ++ ret = 0; ++ break; ++ } ++ ++ qfile_fd = qfile_array->fd_array[qfile_array->next_index]; ++ ret = gfdb_methods.gfdb_read_query_record(qfile_fd, query_record); ++ if (ret <= 0) { ++ /*The qfile_fd has reached EOF or ++ * there was an error. ++ * 1. Close the exhausted fd ++ * 2. increment the exhausted count ++ * 3. shift next_qfile to next qfile ++ **/ ++ sys_close(qfile_fd); ++ qfile_array->fd_array[qfile_array->next_index] = -1; ++ qfile_array->exhausted_count++; ++ /* shift next_qfile to next qfile */ ++ shift_next_index(qfile_array); ++ continue; ++ } else { ++ /* shift next_qfile to next qfile */ ++ shift_next_index(qfile_array); ++ break; ++ } ++ } while (1); ++out: ++ return ret; ++} ++ ++/* Check and update the watermark every WM_INTERVAL seconds */ ++#define WM_INTERVAL 5 ++#define WM_INTERVAL_EMERG 1 ++ ++static int ++tier_check_same_node(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) ++{ ++ int ret = -1; ++ dict_t *dict = NULL; ++ char *uuid_str = NULL; ++ uuid_t node_uuid = { ++ 0, ++ }; ++ ++ GF_VALIDATE_OR_GOTO("tier", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, loc, out); ++ GF_VALIDATE_OR_GOTO(this->name, defrag, out); ++ ++ if (syncop_getxattr(this, loc, &dict, GF_XATTR_NODE_UUID_KEY, NULL, NULL)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Unable to get NODE_UUID_KEY %s %s\n", loc->name, loc->path); ++ goto out; ++ } ++ ++ if (dict_get_str(dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to get node-uuids for %s", loc->path); ++ goto out; ++ } ++ ++ if (gf_uuid_parse(uuid_str, node_uuid)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "uuid_parse failed for %s", loc->path); ++ goto out; ++ } ++ ++ if (gf_uuid_compare(node_uuid, defrag->node_uuid)) { ++ gf_msg_debug(this->name, 0, "%s does not belong to this node", ++ loc->path); ++ ret = 1; ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ if (dict) ++ dict_unref(dict); ++ ++ return ret; ++} ++ ++int ++tier_get_fs_stat(xlator_t *this, loc_t *root_loc) ++{ ++ int ret = 0; ++ gf_defrag_info_t *defrag = NULL; ++ dht_conf_t *conf = NULL; ++ dict_t *xdata = NULL; ++ struct statvfs statfs = { ++ 0, ++ }; ++ gf_tier_conf_t *tier_conf = NULL; ++ ++ conf = this->private; ++ if (!conf) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, ++ "conf is NULL"); ++ ret = -1; ++ goto exit; ++ } ++ ++ defrag = conf->defrag; ++ if (!defrag) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, ++ "defrag is NULL"); ++ ret = -1; ++ goto exit; ++ } ++ ++ tier_conf = &defrag->tier_conf; ++ ++ xdata = dict_new(); ++ if (!xdata) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, ++ "failed to allocate dictionary"); ++ ret = -1; ++ goto exit; ++ } ++ ++ ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, ++ "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict"); ++ ret = -1; ++ goto exit; ++ } ++ ++ /* Find how much free space is on the hot subvolume. ++ * Then see if that value */ ++ /* is less than or greater than user defined watermarks. ++ * Stash results in */ ++ /* the tier_conf data structure. */ ++ ++ ret = syncop_statfs(conf->subvolumes[1], root_loc, &statfs, xdata, NULL); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_STATUS, ++ "Unable to obtain statfs."); ++ goto exit; ++ } ++ ++ pthread_mutex_lock(&dm_stat_mutex); ++ ++ tier_conf->block_size = statfs.f_bsize; ++ tier_conf->blocks_total = statfs.f_blocks; ++ tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree; ++ ++ tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used, ++ statfs.f_blocks); ++ pthread_mutex_unlock(&dm_stat_mutex); ++ ++exit: ++ if (xdata) ++ dict_unref(xdata); ++ return ret; ++} ++ ++static void ++tier_send_watermark_event(const char *volname, tier_watermark_op_t old_wm, ++ tier_watermark_op_t new_wm) ++{ ++ if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) { ++ if (new_wm == TIER_WM_MID) { ++ gf_event(EVENT_TIER_WATERMARK_RAISED_TO_MID, "vol=%s", volname); ++ } else if (new_wm == TIER_WM_HI) { ++ gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname); ++ } ++ } else if (old_wm == TIER_WM_MID) { ++ if (new_wm == TIER_WM_LOW) { ++ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname); ++ } else if (new_wm == TIER_WM_HI) { ++ gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname); ++ } ++ } else if (old_wm == TIER_WM_HI) { ++ if (new_wm == TIER_WM_MID) { ++ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_MID, "vol=%s", volname); ++ } else if (new_wm == TIER_WM_LOW) { ++ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname); ++ } ++ } ++} ++ ++int ++tier_check_watermark(xlator_t *this) ++{ ++ int ret = -1; ++ gf_defrag_info_t *defrag = NULL; ++ dht_conf_t *conf = NULL; ++ gf_tier_conf_t *tier_conf = NULL; ++ tier_watermark_op_t wm = TIER_WM_NONE; ++ ++ conf = this->private; ++ if (!conf) ++ goto exit; ++ ++ defrag = conf->defrag; ++ if (!defrag) ++ goto exit; ++ ++ tier_conf = &defrag->tier_conf; ++ ++ if (tier_conf->percent_full < tier_conf->watermark_low) { ++ wm = TIER_WM_LOW; ++ ++ } else if (tier_conf->percent_full < tier_conf->watermark_hi) { ++ wm = TIER_WM_MID; ++ ++ } else { ++ wm = TIER_WM_HI; ++ } ++ ++ if (wm != tier_conf->watermark_last) { ++ tier_send_watermark_event(tier_conf->volname, tier_conf->watermark_last, ++ wm); ++ ++ tier_conf->watermark_last = wm; ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Tier watermark now %d", wm); ++ } ++ ++ ret = 0; ++ ++exit: ++ return ret; ++} ++ ++static gf_boolean_t ++is_hot_tier_full(gf_tier_conf_t *tier_conf) ++{ ++ if (tier_conf && (tier_conf->mode == TIER_MODE_WM) && ++ (tier_conf->watermark_last == TIER_WM_HI)) ++ return _gf_true; ++ ++ return _gf_false; ++} ++ ++int ++tier_do_migration(xlator_t *this, int promote) ++{ ++ gf_defrag_info_t *defrag = NULL; ++ dht_conf_t *conf = NULL; ++ long rand = 0; ++ int migrate = 0; ++ gf_tier_conf_t *tier_conf = NULL; ++ ++ conf = this->private; ++ if (!conf) ++ goto exit; ++ ++ defrag = conf->defrag; ++ if (!defrag) ++ goto exit; ++ ++ if (tier_check_watermark(this) != 0) { ++ gf_msg(this->name, GF_LOG_CRITICAL, errno, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to get watermark"); ++ goto exit; ++ } ++ ++ tier_conf = &defrag->tier_conf; ++ ++ switch (tier_conf->watermark_last) { ++ case TIER_WM_LOW: ++ migrate = promote ? 1 : 0; ++ break; ++ case TIER_WM_HI: ++ migrate = promote ? 0 : 1; ++ break; ++ case TIER_WM_MID: ++ /* coverity[DC.WEAK_CRYPTO] */ ++ rand = random() % 100; ++ if (promote) { ++ migrate = (rand > tier_conf->percent_full); ++ } else { ++ migrate = (rand <= tier_conf->percent_full); ++ } ++ break; ++ } ++ ++exit: ++ return migrate; ++} ++ ++int ++tier_migrate(xlator_t *this, int is_promotion, dict_t *migrate_data, loc_t *loc, ++ gf_tier_conf_t *tier_conf) ++{ ++ int ret = -1; ++ ++ pthread_mutex_lock(&tier_conf->pause_mutex); ++ if (is_promotion) ++ tier_conf->promote_in_progress = 1; ++ else ++ tier_conf->demote_in_progress = 1; ++ pthread_mutex_unlock(&tier_conf->pause_mutex); ++ ++ /* Data migration */ ++ ret = syncop_setxattr(this, loc, migrate_data, 0, NULL, NULL); ++ ++ pthread_mutex_lock(&tier_conf->pause_mutex); ++ if (is_promotion) ++ tier_conf->promote_in_progress = 0; ++ else ++ tier_conf->demote_in_progress = 0; ++ pthread_mutex_unlock(&tier_conf->pause_mutex); ++ ++ return ret; ++} ++ ++/* returns _gf_true: if file can be promoted ++ * returns _gf_false: if file cannot be promoted ++ */ ++static gf_boolean_t ++tier_can_promote_file(xlator_t *this, char const *file_name, ++ struct iatt *current, gf_defrag_info_t *defrag) ++{ ++ gf_boolean_t ret = _gf_false; ++ fsblkcnt_t estimated_usage = 0; ++ ++ if (defrag->tier_conf.tier_max_promote_size && ++ (current->ia_size > defrag->tier_conf.tier_max_promote_size)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "File %s (gfid:%s) with size (%" PRIu64 ++ ") exceeds maxsize " ++ "(%d) for promotion. File will not be promoted.", ++ file_name, uuid_utoa(current->ia_gfid), current->ia_size, ++ defrag->tier_conf.tier_max_promote_size); ++ goto err; ++ } ++ ++ /* bypass further validations for TEST mode */ ++ if (defrag->tier_conf.mode != TIER_MODE_WM) { ++ ret = _gf_true; ++ goto err; ++ } ++ ++ /* convert the file size to blocks as per the block size of the ++ * destination tier ++ * NOTE: add (block_size - 1) to get the correct block size when ++ * there is a remainder after a modulo ++ */ ++ estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) / ++ defrag->tier_conf.block_size) + ++ defrag->tier_conf.blocks_used; ++ ++ /* test if the estimated block usage goes above HI watermark */ ++ if (GF_PERCENTAGE(estimated_usage, defrag->tier_conf.blocks_total) >= ++ defrag->tier_conf.watermark_hi) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Estimated block count consumption on " ++ "hot tier (%" PRIu64 ++ ") exceeds hi watermark (%d%%). " ++ "File will not be promoted.", ++ estimated_usage, defrag->tier_conf.watermark_hi); ++ goto err; ++ } ++ ret = _gf_true; ++err: ++ return ret; ++} ++ ++static int ++tier_set_migrate_data(dict_t *migrate_data) ++{ ++ int failed = 1; ++ ++ failed = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force"); ++ if (failed) { ++ goto bail_out; ++ } ++ ++ /* Flag to suggest the xattr call is from migrator */ ++ failed = dict_set_str(migrate_data, "from.migrator", "yes"); ++ if (failed) { ++ goto bail_out; ++ } ++ ++ /* Flag to suggest its a tiering migration ++ * The reason for this dic key-value is that ++ * promotions and demotions are multithreaded ++ * so the original frame from gf_defrag_start() ++ * is not carried. A new frame will be created when ++ * we do syncop_setxattr(). This does not have the ++ * frame->root->pid of the original frame. So we pass ++ * this dic key-value when we do syncop_setxattr() to do ++ * data migration and set the frame->root->pid to ++ * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before ++ * calling dht_start_rebalance_task() */ ++ failed = dict_set_str(migrate_data, TIERING_MIGRATION_KEY, "yes"); ++ if (failed) { ++ goto bail_out; ++ } ++ ++ failed = 0; ++ ++bail_out: ++ return failed; ++} ++ ++static char * ++tier_get_parent_path(xlator_t *this, loc_t *p_loc, struct iatt *par_stbuf, ++ int *per_link_status) ++{ ++ int ret = -1; ++ char *parent_path = NULL; ++ dict_t *xdata_request = NULL; ++ dict_t *xdata_response = NULL; ++ ++ xdata_request = dict_new(); ++ if (!xdata_request) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to create xdata_request dict"); ++ goto err; ++ } ++ ret = dict_set_int32(xdata_request, GET_ANCESTRY_PATH_KEY, 42); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to set value to dict : key %s \n", ++ GET_ANCESTRY_PATH_KEY); ++ goto err; ++ } ++ ++ ret = syncop_lookup(this, p_loc, par_stbuf, NULL, xdata_request, ++ &xdata_response); ++ /* When the parent gfid is a stale entry, the lookup ++ * will fail and stop the demotion process. ++ * The parent gfid can be stale when a huge folder is ++ * deleted while the files within it are being migrated ++ */ ++ if (ret == -ESTALE) { ++ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP, ++ "Stale entry in parent lookup for %s", uuid_utoa(p_loc->gfid)); ++ *per_link_status = 1; ++ goto err; ++ } else if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, ++ "Error in parent lookup for %s", uuid_utoa(p_loc->gfid)); ++ *per_link_status = -1; ++ goto err; ++ } ++ ret = dict_get_str(xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path); ++ if (ret || !parent_path) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to get parent path for %s", uuid_utoa(p_loc->gfid)); ++ *per_link_status = -1; ++ goto err; ++ } ++ ++err: ++ if (xdata_request) { ++ dict_unref(xdata_request); ++ } ++ ++ if (xdata_response) { ++ dict_unref(xdata_response); ++ xdata_response = NULL; ++ } ++ ++ return parent_path; ++} ++ ++static int ++tier_get_file_name_and_path(xlator_t *this, uuid_t gfid, ++ gfdb_link_info_t *link_info, ++ char const *parent_path, loc_t *loc, ++ int *per_link_status) ++{ ++ int ret = -1; ++ ++ loc->name = gf_strdup(link_info->file_name); ++ if (!loc->name) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Memory " ++ "allocation failed for %s", ++ uuid_utoa(gfid)); ++ *per_link_status = -1; ++ goto err; ++ } ++ ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to " ++ "construct file path for %s %s\n", ++ parent_path, loc->name); ++ *per_link_status = -1; ++ goto err; ++ } ++ ++ ret = 0; ++ ++err: ++ return ret; ++} ++ ++static int ++tier_lookup_file(xlator_t *this, loc_t *p_loc, loc_t *loc, struct iatt *current, ++ int *per_link_status) ++{ ++ int ret = -1; ++ ++ ret = syncop_lookup(this, loc, current, NULL, NULL, NULL); ++ ++ /* The file may be deleted even when the parent ++ * is available and the lookup will ++ * return a stale entry which would stop the ++ * migration. so if its a stale entry, then skip ++ * the file and keep migrating. ++ */ ++ if (ret == -ESTALE) { ++ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP, ++ "Stale lookup for %s", uuid_utoa(p_loc->gfid)); ++ *per_link_status = 1; ++ goto err; ++ } else if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to " ++ "lookup file %s\n", ++ loc->name); ++ *per_link_status = -1; ++ goto err; ++ } ++ ret = 0; ++ ++err: ++ return ret; ++} ++ ++static gf_boolean_t ++tier_is_file_already_at_destination(xlator_t *src_subvol, ++ query_cbk_args_t *query_cbk_args, ++ dht_conf_t *conf, int *per_link_status) ++{ ++ gf_boolean_t at_destination = _gf_true; ++ ++ if (src_subvol == NULL) { ++ *per_link_status = 1; ++ goto err; ++ } ++ if (query_cbk_args->is_promotion && src_subvol == conf->subvolumes[1]) { ++ *per_link_status = 1; ++ goto err; ++ } ++ ++ if (!query_cbk_args->is_promotion && src_subvol == conf->subvolumes[0]) { ++ *per_link_status = 1; ++ goto err; ++ } ++ at_destination = _gf_false; ++ ++err: ++ return at_destination; ++} ++ ++static void ++tier_update_migration_counters(query_cbk_args_t *query_cbk_args, ++ gf_defrag_info_t *defrag, ++ uint64_t *total_migrated_bytes, int *total_files) ++{ ++ if (query_cbk_args->is_promotion) { ++ defrag->total_files_promoted++; ++ *total_migrated_bytes += defrag->tier_conf.st_last_promoted_size; ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->tier_conf.blocks_used += defrag->tier_conf ++ .st_last_promoted_size; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } else { ++ defrag->total_files_demoted++; ++ *total_migrated_bytes += defrag->tier_conf.st_last_demoted_size; ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->tier_conf.blocks_used -= defrag->tier_conf.st_last_demoted_size; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } ++ if (defrag->tier_conf.blocks_total) { ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->tier_conf.percent_full = GF_PERCENTAGE( ++ defrag->tier_conf.blocks_used, defrag->tier_conf.blocks_total); ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } ++ ++ (*total_files)++; ++} ++ ++static int ++tier_migrate_link(xlator_t *this, dht_conf_t *conf, uuid_t gfid, ++ gfdb_link_info_t *link_info, gf_defrag_info_t *defrag, ++ query_cbk_args_t *query_cbk_args, dict_t *migrate_data, ++ int *per_link_status, int *total_files, ++ uint64_t *total_migrated_bytes) ++{ ++ int ret = -1; ++ struct iatt current = { ++ 0, ++ }; ++ struct iatt par_stbuf = { ++ 0, ++ }; ++ loc_t p_loc = { ++ 0, ++ }; ++ loc_t loc = { ++ 0, ++ }; ++ xlator_t *src_subvol = NULL; ++ inode_t *linked_inode = NULL; ++ char *parent_path = NULL; ++ ++ /* Lookup for parent and get the path of parent */ ++ gf_uuid_copy(p_loc.gfid, link_info->pargfid); ++ p_loc.inode = inode_new(defrag->root_inode->table); ++ if (!p_loc.inode) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to create reference to inode" ++ " for %s", ++ uuid_utoa(p_loc.gfid)); ++ ++ *per_link_status = -1; ++ goto err; ++ } ++ ++ parent_path = tier_get_parent_path(this, &p_loc, &par_stbuf, ++ per_link_status); ++ if (!parent_path) { ++ goto err; ++ } ++ ++ linked_inode = inode_link(p_loc.inode, NULL, NULL, &par_stbuf); ++ inode_unref(p_loc.inode); ++ p_loc.inode = linked_inode; ++ ++ /* Preparing File Inode */ ++ gf_uuid_copy(loc.gfid, gfid); ++ loc.inode = inode_new(defrag->root_inode->table); ++ gf_uuid_copy(loc.pargfid, link_info->pargfid); ++ loc.parent = inode_ref(p_loc.inode); ++ ++ /* Get filename and Construct file path */ ++ if (tier_get_file_name_and_path(this, gfid, link_info, parent_path, &loc, ++ per_link_status) != 0) { ++ goto err; ++ } ++ gf_uuid_copy(loc.parent->gfid, link_info->pargfid); ++ ++ /* lookup file inode */ ++ if (tier_lookup_file(this, &p_loc, &loc, ¤t, per_link_status) != 0) { ++ goto err; ++ } ++ ++ if (query_cbk_args->is_promotion) { ++ if (!tier_can_promote_file(this, link_info->file_name, ¤t, ++ defrag)) { ++ *per_link_status = 1; ++ goto err; ++ } ++ } ++ ++ linked_inode = inode_link(loc.inode, NULL, NULL, ¤t); ++ inode_unref(loc.inode); ++ loc.inode = linked_inode; ++ ++ /* ++ * Do not promote/demote if file already is where it ++ * should be. It means another brick moved the file ++ * so is not an error. So we set per_link_status = 1 ++ * so that we ignore counting this. ++ */ ++ src_subvol = dht_subvol_get_cached(this, loc.inode); ++ ++ if (tier_is_file_already_at_destination(src_subvol, query_cbk_args, conf, ++ per_link_status)) { ++ goto err; ++ } ++ ++ gf_msg_debug(this->name, 0, "Tier %s: src_subvol %s file %s", ++ (query_cbk_args->is_promotion ? "promote" : "demote"), ++ src_subvol->name, loc.path); ++ ++ ret = tier_check_same_node(this, &loc, defrag); ++ if (ret != 0) { ++ if (ret < 0) { ++ *per_link_status = -1; ++ goto err; ++ } ++ ret = 0; ++ /* By setting per_link_status to 1 we are ++ * ignoring this status and will not be counting ++ * this file for migration */ ++ *per_link_status = 1; ++ goto err; ++ } ++ ++ gf_uuid_copy(loc.gfid, loc.inode->gfid); ++ ++ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Tiering paused. " ++ "Exiting tier_migrate_link"); ++ goto err; ++ } ++ ++ ret = tier_migrate(this, query_cbk_args->is_promotion, migrate_data, &loc, ++ &defrag->tier_conf); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to " ++ "migrate %s ", ++ loc.path); ++ *per_link_status = -1; ++ goto err; ++ } ++ ++ tier_update_migration_counters(query_cbk_args, defrag, total_migrated_bytes, ++ total_files); ++ ++ ret = 0; ++ ++err: ++ GF_FREE((char *)loc.name); ++ loc.name = NULL; ++ loc_wipe(&loc); ++ loc_wipe(&p_loc); ++ ++ if ((*total_files >= defrag->tier_conf.max_migrate_files) || ++ (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Reached cycle migration limit." ++ "migrated bytes %" PRId64 " files %d", ++ *total_migrated_bytes, *total_files); ++ ret = -1; ++ } ++ ++ return ret; ++} ++ ++static int ++tier_migrate_using_query_file(void *_args) ++{ ++ int ret = -1; ++ query_cbk_args_t *query_cbk_args = (query_cbk_args_t *)_args; ++ xlator_t *this = NULL; ++ gf_defrag_info_t *defrag = NULL; ++ gfdb_query_record_t *query_record = NULL; ++ gfdb_link_info_t *link_info = NULL; ++ dict_t *migrate_data = NULL; ++ /* ++ * per_file_status and per_link_status ++ * 0 : success ++ * -1 : failure ++ * 1 : ignore the status and don't count for migration ++ * */ ++ int per_file_status = 0; ++ int per_link_status = 0; ++ int total_status = 0; ++ dht_conf_t *conf = NULL; ++ uint64_t total_migrated_bytes = 0; ++ int total_files = 0; ++ loc_t root_loc = {0}; ++ gfdb_time_t start_time = {0}; ++ gfdb_time_t current_time = {0}; ++ int total_time = 0; ++ int max_time = 0; ++ gf_boolean_t emergency_demote_mode = _gf_false; ++ ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out); ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); ++ this = query_cbk_args->this; ++ GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->defrag, out); ++ GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->qfile_array, out); ++ GF_VALIDATE_OR_GOTO(this->name, this->private, out); ++ ++ conf = this->private; ++ ++ defrag = query_cbk_args->defrag; ++ migrate_data = dict_new(); ++ if (!migrate_data) ++ goto out; ++ ++ emergency_demote_mode = (!query_cbk_args->is_promotion && ++ is_hot_tier_full(&defrag->tier_conf)); ++ ++ if (tier_set_migrate_data(migrate_data) != 0) { ++ goto out; ++ } ++ ++ dht_build_root_loc(defrag->root_inode, &root_loc); ++ ++ ret = gettimeofday(&start_time, NULL); ++ if (query_cbk_args->is_promotion) { ++ max_time = defrag->tier_conf.tier_promote_frequency; ++ } else { ++ max_time = defrag->tier_conf.tier_demote_frequency; ++ } ++ ++ /* Per file */ ++ while ((ret = read_query_record_list(query_cbk_args->qfile_array, ++ &query_record)) != 0) { ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to fetch query record " ++ "from query file"); ++ goto out; ++ } ++ ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ++ ret = -1; ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Exiting tier migration as" ++ "defrag status is not started"); ++ goto out; ++ } ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Could not get current time."); ++ goto out; ++ } ++ ++ total_time = current_time.tv_sec - start_time.tv_sec; ++ if (total_time > max_time) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Max cycle time reached. Exiting migration."); ++ goto out; ++ } ++ ++ per_file_status = 0; ++ per_link_status = 0; ++ ++ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Tiering paused. " ++ "Exiting tier_migrate_using_query_file"); ++ break; ++ } ++ ++ if (defrag->tier_conf.mode == TIER_MODE_WM) { ++ ret = tier_get_fs_stat(this, &root_loc); ++ if (ret != 0) { ++ gfdb_methods.gfdb_query_record_free(query_record); ++ query_record = NULL; ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, ++ "tier_get_fs_stat() FAILED ... " ++ "skipping file migrations until next cycle"); ++ break; ++ } ++ ++ if (!tier_do_migration(this, query_cbk_args->is_promotion)) { ++ gfdb_methods.gfdb_query_record_free(query_record); ++ query_record = NULL; ++ ++ /* We have crossed the high watermark. Stop processing ++ * files if this is a promotion cycle so demotion gets ++ * a chance to start if not already running*/ ++ ++ if (query_cbk_args->is_promotion && ++ is_hot_tier_full(&defrag->tier_conf)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "High watermark crossed during " ++ "promotion. Exiting " ++ "tier_migrate_using_query_file"); ++ break; ++ } ++ continue; ++ } ++ } ++ ++ per_link_status = 0; ++ ++ /* For now we only support single link migration. And we will ++ * ignore other hard links in the link info list of query record ++ * TODO: Multiple hard links migration */ ++ if (!list_empty(&query_record->link_list)) { ++ link_info = list_first_entry(&query_record->link_list, ++ gfdb_link_info_t, list); ++ } ++ if (link_info != NULL) { ++ if (tier_migrate_link(this, conf, query_record->gfid, link_info, ++ defrag, query_cbk_args, migrate_data, ++ &per_link_status, &total_files, ++ &total_migrated_bytes) != 0) { ++ gf_msg( ++ this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "%s failed for %s(gfid:%s)", ++ (query_cbk_args->is_promotion ? "Promotion" : "Demotion"), ++ link_info->file_name, uuid_utoa(query_record->gfid)); ++ } ++ } ++ per_file_status = per_link_status; ++ ++ if (per_file_status < 0) { /* Failure */ ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->total_failures++; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } else if (per_file_status == 0) { /* Success */ ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->total_files++; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } else if (per_file_status == 1) { /* Ignore */ ++ per_file_status = 0; ++ /* Since this attempt was ignored we ++ * decrement the lookup count*/ ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->num_files_lookedup--; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } ++ total_status = total_status + per_file_status; ++ per_link_status = 0; ++ per_file_status = 0; ++ ++ gfdb_methods.gfdb_query_record_free(query_record); ++ query_record = NULL; ++ ++ /* If we are demoting and the entry watermark was HI, then ++ * we are done with emergency demotions if the current ++ * watermark has fallen below hi-watermark level ++ */ ++ if (emergency_demote_mode) { ++ if (tier_check_watermark(this) == 0) { ++ if (!is_hot_tier_full(&defrag->tier_conf)) { ++ break; ++ } ++ } ++ } ++ } ++ ++out: ++ if (migrate_data) ++ dict_unref(migrate_data); ++ ++ gfdb_methods.gfdb_query_record_free(query_record); ++ query_record = NULL; ++ ++ return total_status; ++} ++ ++/* This is the call back function per record/file from data base */ ++static int ++tier_gf_query_callback(gfdb_query_record_t *gfdb_query_record, void *_args) ++{ ++ int ret = -1; ++ query_cbk_args_t *query_cbk_args = _args; ++ ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out); ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->defrag, out); ++ GF_VALIDATE_OR_GOTO("tier", (query_cbk_args->query_fd > 0), out); ++ ++ ret = gfdb_methods.gfdb_write_query_record(query_cbk_args->query_fd, ++ gfdb_query_record); ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed writing query record to query file"); ++ goto out; ++ } ++ ++ pthread_mutex_lock(&dm_stat_mutex); ++ query_cbk_args->defrag->num_files_lookedup++; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++/* Create query file in tier process */ ++static int ++tier_process_self_query(tier_brick_list_t *local_brick, void *args) ++{ ++ int ret = -1; ++ char *db_path = NULL; ++ query_cbk_args_t *query_cbk_args = NULL; ++ xlator_t *this = NULL; ++ gfdb_conn_node_t *conn_node = NULL; ++ dict_t *params_dict = NULL; ++ dict_t *ctr_ipc_dict = NULL; ++ gfdb_brick_info_t *gfdb_brick_info = args; ++ ++ /*Init of all the essentials*/ ++ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); ++ query_cbk_args = gfdb_brick_info->_query_cbk_args; ++ ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); ++ this = query_cbk_args->this; ++ ++ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); ++ ++ db_path = local_brick->brick_db_path; ++ ++ /*Preparing DB parameters before init_db i.e getting db connection*/ ++ params_dict = dict_new(); ++ if (!params_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "DB Params cannot initialized"); ++ goto out; ++ } ++ SET_DB_PARAM_TO_DICT(this->name, params_dict, ++ (char *)gfdb_methods.get_db_path_key(), db_path, ret, ++ out); ++ ++ /*Get the db connection*/ ++ conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type); ++ if (!conn_node) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "FATAL: Failed initializing db operations"); ++ goto out; ++ } ++ ++ /* Query for eligible files from db */ ++ query_cbk_args->query_fd = open(local_brick->qfile_path, ++ O_WRONLY | O_CREAT | O_APPEND, ++ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); ++ if (query_cbk_args->query_fd < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to open query file %s", local_brick->qfile_path); ++ goto out; ++ } ++ if (!gfdb_brick_info->_gfdb_promote) { ++ if (query_cbk_args->defrag->tier_conf.watermark_last == TIER_WM_HI) { ++ /* emergency demotion mode */ ++ ret = gfdb_methods.find_all( ++ conn_node, tier_gf_query_callback, (void *)query_cbk_args, ++ query_cbk_args->defrag->tier_conf.query_limit); ++ } else { ++ if (query_cbk_args->defrag->write_freq_threshold == 0 && ++ query_cbk_args->defrag->read_freq_threshold == 0) { ++ ret = gfdb_methods.find_unchanged_for_time( ++ conn_node, tier_gf_query_callback, (void *)query_cbk_args, ++ gfdb_brick_info->time_stamp); ++ } else { ++ ret = gfdb_methods.find_unchanged_for_time_freq( ++ conn_node, tier_gf_query_callback, (void *)query_cbk_args, ++ gfdb_brick_info->time_stamp, ++ query_cbk_args->defrag->write_freq_threshold, ++ query_cbk_args->defrag->read_freq_threshold, _gf_false); ++ } ++ } ++ } else { ++ if (query_cbk_args->defrag->write_freq_threshold == 0 && ++ query_cbk_args->defrag->read_freq_threshold == 0) { ++ ret = gfdb_methods.find_recently_changed_files( ++ conn_node, tier_gf_query_callback, (void *)query_cbk_args, ++ gfdb_brick_info->time_stamp); ++ } else { ++ ret = gfdb_methods.find_recently_changed_files_freq( ++ conn_node, tier_gf_query_callback, (void *)query_cbk_args, ++ gfdb_brick_info->time_stamp, ++ query_cbk_args->defrag->write_freq_threshold, ++ query_cbk_args->defrag->read_freq_threshold, _gf_false); ++ } ++ } ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "FATAL: query from db failed"); ++ goto out; ++ } ++ ++ /*Clear the heat on the DB entries*/ ++ /*Preparing ctr_ipc_dict*/ ++ ctr_ipc_dict = dict_new(); ++ if (!ctr_ipc_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "ctr_ipc_dict cannot initialized"); ++ goto out; ++ } ++ ++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY, ++ GFDB_IPC_CTR_CLEAR_OPS, ret, out); ++ ++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, ++ NULL); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed clearing the heat " ++ "on db %s error %d", ++ local_brick->brick_db_path, ret); ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ if (params_dict) { ++ dict_unref(params_dict); ++ params_dict = NULL; ++ } ++ ++ if (ctr_ipc_dict) { ++ dict_unref(ctr_ipc_dict); ++ ctr_ipc_dict = NULL; ++ } ++ ++ if (query_cbk_args && query_cbk_args->query_fd >= 0) { ++ sys_close(query_cbk_args->query_fd); ++ query_cbk_args->query_fd = -1; ++ } ++ gfdb_methods.fini_db(conn_node); ++ ++ return ret; ++} ++ ++/*Ask CTR to create the query file*/ ++static int ++tier_process_ctr_query(tier_brick_list_t *local_brick, void *args) ++{ ++ int ret = -1; ++ query_cbk_args_t *query_cbk_args = NULL; ++ xlator_t *this = NULL; ++ dict_t *ctr_ipc_in_dict = NULL; ++ dict_t *ctr_ipc_out_dict = NULL; ++ gfdb_brick_info_t *gfdb_brick_info = args; ++ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL; ++ int count = 0; ++ ++ /*Init of all the essentials*/ ++ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); ++ query_cbk_args = gfdb_brick_info->_query_cbk_args; ++ ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); ++ this = query_cbk_args->this; ++ ++ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); ++ ++ /*Preparing ctr_ipc_in_dict*/ ++ ctr_ipc_in_dict = dict_new(); ++ if (!ctr_ipc_in_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "ctr_ipc_in_dict cannot initialized"); ++ goto out; ++ } ++ ++ ipc_ctr_params = GF_CALLOC(1, sizeof(gfdb_ipc_ctr_params_t), ++ gf_tier_mt_ipc_ctr_params_t); ++ if (!ipc_ctr_params) { ++ goto out; ++ } ++ ++ /* set all the query params*/ ++ ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote; ++ ++ ipc_ctr_params->write_freq_threshold = query_cbk_args->defrag ++ ->write_freq_threshold; ++ ++ ipc_ctr_params->read_freq_threshold = query_cbk_args->defrag ++ ->read_freq_threshold; ++ ++ ipc_ctr_params->query_limit = query_cbk_args->defrag->tier_conf.query_limit; ++ ++ ipc_ctr_params->emergency_demote = (!gfdb_brick_info->_gfdb_promote && ++ query_cbk_args->defrag->tier_conf ++ .watermark_last == TIER_WM_HI); ++ ++ memcpy(&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp, ++ sizeof(gfdb_time_t)); ++ ++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, GFDB_IPC_CTR_KEY, ++ GFDB_IPC_CTR_QUERY_OPS, ret, out); ++ ++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, ++ GFDB_IPC_CTR_GET_QFILE_PATH, local_brick->qfile_path, ++ ret, out); ++ ++ ret = dict_set_bin(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS, ++ ipc_ctr_params, sizeof(*ipc_ctr_params)); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed setting %s to params dictionary", ++ GFDB_IPC_CTR_GET_QUERY_PARAMS); ++ GF_FREE(ipc_ctr_params); ++ goto out; ++ } ++ ipc_ctr_params = NULL; ++ ++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_in_dict, ++ &ctr_ipc_out_dict); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_IPC_TIER_ERROR, ++ "Failed query on %s ret %d", local_brick->brick_db_path, ret); ++ goto out; ++ } ++ ++ ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, ++ &count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed getting count " ++ "of records on %s", ++ local_brick->brick_db_path); ++ goto out; ++ } ++ ++ if (count < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed query on %s", local_brick->brick_db_path); ++ ret = -1; ++ goto out; ++ } ++ ++ pthread_mutex_lock(&dm_stat_mutex); ++ query_cbk_args->defrag->num_files_lookedup = count; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ ++ ret = 0; ++out: ++ ++ if (ctr_ipc_in_dict) { ++ dict_unref(ctr_ipc_in_dict); ++ ctr_ipc_in_dict = NULL; ++ } ++ ++ if (ctr_ipc_out_dict) { ++ dict_unref(ctr_ipc_out_dict); ++ ctr_ipc_out_dict = NULL; ++ } ++ ++ GF_FREE(ipc_ctr_params); ++ ++ return ret; ++} ++ ++/* This is the call back function for each brick from hot/cold bricklist ++ * It picks up each bricks db and queries for eligible files for migration. ++ * The list of eligible files are populated in appropriate query files*/ ++static int ++tier_process_brick(tier_brick_list_t *local_brick, void *args) ++{ ++ int ret = -1; ++ dict_t *ctr_ipc_in_dict = NULL; ++ dict_t *ctr_ipc_out_dict = NULL; ++ char *strval = NULL; ++ ++ GF_VALIDATE_OR_GOTO("tier", local_brick, out); ++ ++ GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out); ++ ++ if (dht_tier_db_type == GFDB_SQLITE3) { ++ /*Preparing ctr_ipc_in_dict*/ ++ ctr_ipc_in_dict = dict_new(); ++ if (!ctr_ipc_in_dict) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "ctr_ipc_in_dict cannot initialized"); ++ goto out; ++ } ++ ++ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_KEY, ++ GFDB_IPC_CTR_GET_DB_PARAM_OPS); ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed to set %s " ++ "to params dictionary", ++ GFDB_IPC_CTR_KEY); ++ goto out; ++ } ++ ++ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_PARAM_OPS, ""); ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed to set %s " ++ "to params dictionary", ++ GFDB_IPC_CTR_GET_DB_PARAM_OPS); ++ goto out; ++ } ++ ++ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_KEY, ++ "journal_mode"); ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed to set %s " ++ "to params dictionary", ++ GFDB_IPC_CTR_GET_DB_KEY); ++ goto out; ++ } ++ ++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ++ ctr_ipc_in_dict, &ctr_ipc_out_dict); ++ if (ret || ctr_ipc_out_dict == NULL) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to get " ++ "journal_mode of sql db %s", ++ local_brick->brick_db_path); ++ goto out; ++ } ++ ++ ret = dict_get_str(ctr_ipc_out_dict, "journal_mode", &strval); ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED, ++ "Failed to get %s " ++ "from params dictionary" ++ "journal_mode", ++ strval); ++ goto out; ++ } ++ ++ if (strval && (strncmp(strval, "wal", SLEN("wal")) == 0)) { ++ ret = tier_process_self_query(local_brick, args); ++ if (ret) { ++ goto out; ++ } ++ } else { ++ ret = tier_process_ctr_query(local_brick, args); ++ if (ret) { ++ goto out; ++ } ++ } ++ ret = 0; ++ ++ } else { ++ ret = tier_process_self_query(local_brick, args); ++ if (ret) { ++ goto out; ++ } ++ } ++ ++ ret = 0; ++out: ++ if (ctr_ipc_in_dict) ++ dict_unref(ctr_ipc_in_dict); ++ ++ if (ctr_ipc_out_dict) ++ dict_unref(ctr_ipc_out_dict); ++ ++ return ret; ++} ++ ++static int ++tier_build_migration_qfile(migration_args_t *args, ++ query_cbk_args_t *query_cbk_args, ++ gf_boolean_t is_promotion) ++{ ++ gfdb_time_t current_time; ++ gfdb_brick_info_t gfdb_brick_info; ++ gfdb_time_t time_in_past; ++ int ret = -1; ++ tier_brick_list_t *local_brick = NULL; ++ int i = 0; ++ time_in_past.tv_sec = args->freq_time; ++ time_in_past.tv_usec = 0; ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_msg(args->this->name, GF_LOG_ERROR, errno, ++ DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time"); ++ goto out; ++ } ++ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; ++ ++ /* The migration daemon may run a varying numberof usec after the */ ++ /* sleep call triggers. A file may be registered in CTR some number */ ++ /* of usec X after the daemon started and missed in the subsequent */ ++ /* cycle if the daemon starts Y usec after the period in seconds */ ++ /* where Y>X. Normalize away this problem by always setting usec */ ++ /* to 0. */ ++ time_in_past.tv_usec = 0; ++ ++ gfdb_brick_info.time_stamp = &time_in_past; ++ gfdb_brick_info._gfdb_promote = is_promotion; ++ gfdb_brick_info._query_cbk_args = query_cbk_args; ++ ++ list_for_each_entry(local_brick, args->brick_list, list) ++ { ++ /* Construct query file path for this brick ++ * i.e ++ * /var/run/gluster/xlator_name/ ++ * {promote/demote}-brickname-indexinbricklist ++ * So that no two query files will have same path even ++ * bricks have the same name ++ * */ ++ snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d", ++ GET_QFILE_PATH(gfdb_brick_info._gfdb_promote), ++ local_brick->brick_name, i); ++ ++ /* Delete any old query files for this brick */ ++ sys_unlink(local_brick->qfile_path); ++ ++ ret = tier_process_brick(local_brick, &gfdb_brick_info); ++ if (ret) { ++ gf_msg(args->this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_BRICK_QUERY_FAILED, "Brick %s query failed\n", ++ local_brick->brick_db_path); ++ } ++ i++; ++ } ++ ret = 0; ++out: ++ return ret; ++} ++ ++static int ++tier_migrate_files_using_qfile(migration_args_t *comp, ++ query_cbk_args_t *query_cbk_args) ++{ ++ int ret = -1; ++ tier_brick_list_t *local_brick = NULL; ++ tier_brick_list_t *temp = NULL; ++ gfdb_time_t current_time = { ++ 0, ++ }; ++ ssize_t qfile_array_size = 0; ++ int count = 0; ++ int temp_fd = 0; ++ gf_tier_conf_t *tier_conf = NULL; ++ ++ tier_conf = &(query_cbk_args->defrag->tier_conf); ++ ++ /* Time for error query files */ ++ gettimeofday(¤t_time, NULL); ++ ++ /* Build the qfile list */ ++ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) ++ { ++ qfile_array_size++; ++ } ++ query_cbk_args->qfile_array = qfile_array_new(qfile_array_size); ++ if (!query_cbk_args->qfile_array) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to create new " ++ "qfile_array"); ++ goto out; ++ } ++ ++ /*Open all qfiles*/ ++ count = 0; ++ query_cbk_args->qfile_array->exhausted_count = 0; ++ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) ++ { ++ temp_fd = query_cbk_args->qfile_array->fd_array[count]; ++ temp_fd = open(local_brick->qfile_path, O_RDONLY, ++ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); ++ if (temp_fd < 0) { ++ gf_msg("tier", GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to open " ++ "%s to the query file", ++ local_brick->qfile_path); ++ query_cbk_args->qfile_array->exhausted_count++; ++ } ++ query_cbk_args->qfile_array->fd_array[count] = temp_fd; ++ count++; ++ } ++ ++ /* Moving the query file index to the next, so that we won't the same ++ * query file every cycle as the first one */ ++ query_cbk_args->qfile_array ++ ->next_index = (query_cbk_args->is_promotion) ++ ? tier_conf->last_promote_qfile_index ++ : tier_conf->last_demote_qfile_index; ++ shift_next_index(query_cbk_args->qfile_array); ++ if (query_cbk_args->is_promotion) { ++ tier_conf->last_promote_qfile_index = query_cbk_args->qfile_array ++ ->next_index; ++ } else { ++ tier_conf->last_demote_qfile_index = query_cbk_args->qfile_array ++ ->next_index; ++ } ++ ++ /* Migrate files using query file list */ ++ ret = tier_migrate_using_query_file((void *)query_cbk_args); ++out: ++ qfile_array_free(query_cbk_args->qfile_array); ++ ++ /* If there is an error rename all the query files to .err files ++ * with a timestamp for better debugging */ ++ if (ret) { ++ struct tm tm = { ++ 0, ++ }; ++ char time_str[128] = { ++ 0, ++ }; ++ char query_file_path_err[PATH_MAX] = { ++ 0, ++ }; ++ int32_t len = 0; ++ ++ /* Time format for error query files */ ++ gmtime_r(¤t_time.tv_sec, &tm); ++ strftime(time_str, sizeof(time_str), "%F-%T", &tm); ++ ++ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) ++ { ++ /* rename error qfile*/ ++ len = snprintf(query_file_path_err, sizeof(query_file_path_err), ++ "%s-%s.err", local_brick->qfile_path, time_str); ++ if ((len >= 0) && (len < sizeof(query_file_path_err))) { ++ if (sys_rename(local_brick->qfile_path, query_file_path_err) == ++ -1) ++ gf_msg_debug("tier", 0, ++ "rename " ++ "failed"); ++ } ++ } ++ } ++ ++ query_cbk_args->qfile_array = NULL; ++ ++ return ret; ++} ++ ++int ++tier_demote(migration_args_t *demotion_args) ++{ ++ query_cbk_args_t query_cbk_args; ++ int ret = -1; ++ ++ GF_VALIDATE_OR_GOTO("tier", demotion_args, out); ++ GF_VALIDATE_OR_GOTO("tier", demotion_args->this, out); ++ GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->brick_list, ++ out); ++ GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->defrag, out); ++ ++ THIS = demotion_args->this; ++ ++ query_cbk_args.this = demotion_args->this; ++ query_cbk_args.defrag = demotion_args->defrag; ++ query_cbk_args.is_promotion = 0; ++ ++ /*Build the query file using bricklist*/ ++ ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, _gf_false); ++ if (ret) ++ goto out; ++ ++ /* Migrate files using the query file */ ++ ret = tier_migrate_files_using_qfile(demotion_args, &query_cbk_args); ++ if (ret) ++ goto out; ++ ++out: ++ demotion_args->return_value = ret; ++ return ret; ++} ++ ++int ++tier_promote(migration_args_t *promotion_args) ++{ ++ int ret = -1; ++ query_cbk_args_t query_cbk_args; ++ ++ GF_VALIDATE_OR_GOTO("tier", promotion_args->this, out); ++ GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->brick_list, ++ out); ++ GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->defrag, ++ out); ++ ++ THIS = promotion_args->this; ++ ++ query_cbk_args.this = promotion_args->this; ++ query_cbk_args.defrag = promotion_args->defrag; ++ query_cbk_args.is_promotion = 1; ++ ++ /*Build the query file using bricklist*/ ++ ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, _gf_true); ++ if (ret) ++ goto out; ++ ++ /* Migrate files using the query file */ ++ ret = tier_migrate_files_using_qfile(promotion_args, &query_cbk_args); ++ if (ret) ++ goto out; ++ ++out: ++ promotion_args->return_value = ret; ++ return ret; ++} ++ ++/* ++ * Command the CTR on a brick to compact the local database using an IPC ++ */ ++static int ++tier_process_self_compact(tier_brick_list_t *local_brick, void *args) ++{ ++ int ret = -1; ++ char *db_path = NULL; ++ query_cbk_args_t *query_cbk_args = NULL; ++ xlator_t *this = NULL; ++ gfdb_conn_node_t *conn_node = NULL; ++ dict_t *params_dict = NULL; ++ dict_t *ctr_ipc_dict = NULL; ++ gfdb_brick_info_t *gfdb_brick_info = args; ++ ++ /*Init of all the essentials*/ ++ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); ++ query_cbk_args = gfdb_brick_info->_query_cbk_args; ++ ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); ++ this = query_cbk_args->this; ++ ++ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); ++ ++ db_path = local_brick->brick_db_path; ++ ++ /*Preparing DB parameters before init_db i.e getting db connection*/ ++ params_dict = dict_new(); ++ if (!params_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "DB Params cannot initialized"); ++ goto out; ++ } ++ SET_DB_PARAM_TO_DICT(this->name, params_dict, ++ (char *)gfdb_methods.get_db_path_key(), db_path, ret, ++ out); ++ ++ /*Get the db connection*/ ++ conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type); ++ if (!conn_node) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "FATAL: Failed initializing db operations"); ++ goto out; ++ } ++ ++ ret = 0; ++ ++ /*Preparing ctr_ipc_dict*/ ++ ctr_ipc_dict = dict_new(); ++ if (!ctr_ipc_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "ctr_ipc_dict cannot initialized"); ++ goto out; ++ } ++ ++ ret = dict_set_int32(ctr_ipc_dict, "compact_active", ++ query_cbk_args->defrag->tier_conf.compact_active); ++ ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed to set %s " ++ "to params dictionary", ++ "compact_active"); ++ goto out; ++ } ++ ++ ret = dict_set_int32( ++ ctr_ipc_dict, "compact_mode_switched", ++ query_cbk_args->defrag->tier_conf.compact_mode_switched); ++ ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed to set %s " ++ "to params dictionary", ++ "compact_mode_switched"); ++ goto out; ++ } ++ ++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY, ++ GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ret, out); ++ ++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Starting Compaction IPC"); ++ ++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, ++ NULL); ++ ++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Ending Compaction IPC"); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed compaction " ++ "on db %s error %d", ++ local_brick->brick_db_path, ret); ++ goto out; ++ } ++ ++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "SUCCESS: %s Compaction", local_brick->brick_name); ++ ++ ret = 0; ++out: ++ if (params_dict) { ++ dict_unref(params_dict); ++ params_dict = NULL; ++ } ++ ++ if (ctr_ipc_dict) { ++ dict_unref(ctr_ipc_dict); ++ ctr_ipc_dict = NULL; ++ } ++ ++ gfdb_methods.fini_db(conn_node); ++ ++ return ret; ++} ++ ++/* ++ * This is the call back function for each brick from hot/cold bricklist. ++ * It determines the database type on each brick and calls the corresponding ++ * function to prepare the compaction IPC. ++ */ ++static int ++tier_compact_db_brick(tier_brick_list_t *local_brick, void *args) ++{ ++ int ret = -1; ++ ++ GF_VALIDATE_OR_GOTO("tier", local_brick, out); ++ ++ GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out); ++ ++ ret = tier_process_self_compact(local_brick, args); ++ if (ret) { ++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Brick %s did not compact", local_brick->brick_name); ++ goto out; ++ } ++ ++ ret = 0; ++ ++out: ++ ++ return ret; ++} ++ ++static int ++tier_send_compact(migration_args_t *args, query_cbk_args_t *query_cbk_args) ++{ ++ gfdb_time_t current_time; ++ gfdb_brick_info_t gfdb_brick_info; ++ gfdb_time_t time_in_past; ++ int ret = -1; ++ tier_brick_list_t *local_brick = NULL; ++ ++ time_in_past.tv_sec = args->freq_time; ++ time_in_past.tv_usec = 0; ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_msg(args->this->name, GF_LOG_ERROR, errno, ++ DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time"); ++ goto out; ++ } ++ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; ++ ++ /* The migration daemon may run a varying numberof usec after the sleep ++ call triggers. A file may be registered in CTR some number of usec X ++ after the daemon started and missed in the subsequent cycle if the ++ daemon starts Y usec after the period in seconds where Y>X. Normalize ++ away this problem by always setting usec to 0. */ ++ time_in_past.tv_usec = 0; ++ ++ gfdb_brick_info.time_stamp = &time_in_past; ++ ++ /* This is meant to say we are always compacting at this point */ ++ /* We simply borrow the promotion flag to do this */ ++ gfdb_brick_info._gfdb_promote = 1; ++ ++ gfdb_brick_info._query_cbk_args = query_cbk_args; ++ ++ list_for_each_entry(local_brick, args->brick_list, list) ++ { ++ gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Start compaction for %s", local_brick->brick_name); ++ ++ ret = tier_compact_db_brick(local_brick, &gfdb_brick_info); ++ if (ret) { ++ gf_msg(args->this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_BRICK_QUERY_FAILED, "Brick %s compaction failed\n", ++ local_brick->brick_db_path); ++ } ++ ++ gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "End compaction for %s", local_brick->brick_name); ++ } ++ ret = 0; ++out: ++ return ret; ++} ++ ++static int ++tier_compact(void *args) ++{ ++ int ret = -1; ++ query_cbk_args_t query_cbk_args; ++ migration_args_t *compaction_args = args; ++ ++ GF_VALIDATE_OR_GOTO("tier", compaction_args->this, out); ++ GF_VALIDATE_OR_GOTO(compaction_args->this->name, ++ compaction_args->brick_list, out); ++ GF_VALIDATE_OR_GOTO(compaction_args->this->name, compaction_args->defrag, ++ out); ++ ++ THIS = compaction_args->this; ++ ++ query_cbk_args.this = compaction_args->this; ++ query_cbk_args.defrag = compaction_args->defrag; ++ query_cbk_args.is_compaction = 1; ++ ++ /* Send the compaction pragma out to all the bricks on the bricklist. */ ++ /* tier_get_bricklist ensures all bricks on the list are local to */ ++ /* this node. */ ++ ret = tier_send_compact(compaction_args, &query_cbk_args); ++ if (ret) ++ goto out; ++ ++ ret = 0; ++out: ++ compaction_args->return_value = ret; ++ return ret; ++} ++ ++static int ++tier_get_bricklist(xlator_t *xl, struct list_head *local_bricklist_head) ++{ ++ xlator_list_t *child = NULL; ++ char *rv = NULL; ++ char *rh = NULL; ++ char *brickname = NULL; ++ char db_name[PATH_MAX] = ""; ++ int ret = 0; ++ tier_brick_list_t *local_brick = NULL; ++ int32_t len = 0; ++ ++ GF_VALIDATE_OR_GOTO("tier", xl, out); ++ GF_VALIDATE_OR_GOTO("tier", local_bricklist_head, out); ++ ++ /* ++ * This function obtains remote subvolumes and filters out only ++ * those running on the same node as the tier daemon. ++ */ ++ if (strcmp(xl->type, "protocol/client") == 0) { ++ ret = dict_get_str(xl->options, "remote-host", &rh); ++ if (ret < 0) ++ goto out; ++ ++ if (gf_is_local_addr(rh)) { ++ local_brick = GF_CALLOC(1, sizeof(tier_brick_list_t), ++ gf_tier_mt_bricklist_t); ++ if (!local_brick) { ++ goto out; ++ } ++ ++ ret = dict_get_str(xl->options, "remote-subvolume", &rv); ++ if (ret < 0) ++ goto out; ++ ++ brickname = strrchr(rv, '/') + 1; ++ snprintf(db_name, sizeof(db_name), "%s.db", brickname); ++ ++ local_brick->brick_db_path = GF_MALLOC(PATH_MAX, gf_common_mt_char); ++ if (!local_brick->brick_db_path) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Failed to allocate memory for" ++ " bricklist."); ++ ret = -1; ++ goto out; ++ } ++ ++ len = snprintf(local_brick->brick_db_path, PATH_MAX, "%s/%s/%s", rv, ++ GF_HIDDEN_PATH, db_name); ++ if ((len < 0) || (len >= PATH_MAX)) { ++ gf_msg("tier", GF_LOG_ERROR, EINVAL, DHT_MSG_LOG_TIER_STATUS, ++ "DB path too long"); ++ ret = -1; ++ goto out; ++ } ++ ++ local_brick->xlator = xl; ++ ++ snprintf(local_brick->brick_name, NAME_MAX, "%s", brickname); ++ ++ list_add_tail(&(local_brick->list), local_bricklist_head); ++ ++ ret = 0; ++ goto out; ++ } ++ } ++ ++ for (child = xl->children; child; child = child->next) { ++ ret = tier_get_bricklist(child->xlator, local_bricklist_head); ++ if (ret) { ++ goto out; ++ } ++ } ++ ++ ret = 0; ++out: ++ ++ if (ret) { ++ if (local_brick) { ++ GF_FREE(local_brick->brick_db_path); ++ } ++ GF_FREE(local_brick); ++ } ++ ++ return ret; ++} ++ ++int ++tier_get_freq_demote(gf_tier_conf_t *tier_conf) ++{ ++ if ((tier_conf->mode == TIER_MODE_WM) && ++ (tier_conf->watermark_last == TIER_WM_HI)) ++ return DEFAULT_DEMOTE_DEGRADED; ++ else ++ return tier_conf->tier_demote_frequency; ++} ++ ++int ++tier_get_freq_promote(gf_tier_conf_t *tier_conf) ++{ ++ return tier_conf->tier_promote_frequency; ++} ++ ++int ++tier_get_freq_compact_hot(gf_tier_conf_t *tier_conf) ++{ ++ return tier_conf->tier_compact_hot_frequency; ++} ++ ++int ++tier_get_freq_compact_cold(gf_tier_conf_t *tier_conf) ++{ ++ return tier_conf->tier_compact_cold_frequency; ++} ++ ++static int ++tier_check_demote(gfdb_time_t current_time, int freq) ++{ ++ return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false; ++} ++ ++static gf_boolean_t ++tier_check_promote(gf_tier_conf_t *tier_conf, gfdb_time_t current_time, ++ int freq) ++{ ++ if ((tier_conf->mode == TIER_MODE_WM) && ++ (tier_conf->watermark_last == TIER_WM_HI)) ++ return _gf_false; ++ ++ else ++ return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false; ++} ++ ++static gf_boolean_t ++tier_check_compact(gf_tier_conf_t *tier_conf, gfdb_time_t current_time, ++ int freq_compact) ++{ ++ if (!(tier_conf->compact_active || tier_conf->compact_mode_switched)) ++ return _gf_false; ++ ++ return ((current_time.tv_sec % freq_compact) == 0) ? _gf_true : _gf_false; ++} ++ ++void ++clear_bricklist(struct list_head *brick_list) ++{ ++ tier_brick_list_t *local_brick = NULL; ++ tier_brick_list_t *temp = NULL; ++ ++ if (list_empty(brick_list)) { ++ return; ++ } ++ ++ list_for_each_entry_safe(local_brick, temp, brick_list, list) ++ { ++ list_del(&local_brick->list); ++ GF_FREE(local_brick->brick_db_path); ++ GF_FREE(local_brick); ++ } ++} ++ ++static void ++set_brick_list_qpath(struct list_head *brick_list, gf_boolean_t is_cold) ++{ ++ tier_brick_list_t *local_brick = NULL; ++ int i = 0; ++ ++ GF_VALIDATE_OR_GOTO("tier", brick_list, out); ++ ++ list_for_each_entry(local_brick, brick_list, list) ++ { ++ /* Construct query file path for this brick ++ * i.e ++ * /var/run/gluster/xlator_name/ ++ * {promote/demote}-brickname-indexinbricklist ++ * So that no two query files will have same path even ++ * bricks have the same name ++ * */ ++ snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d", ++ GET_QFILE_PATH(is_cold), local_brick->brick_name, i); ++ i++; ++ } ++out: ++ return; ++} ++ ++static int ++tier_prepare_compact(migration_args_t *args, gfdb_time_t current_time) ++{ ++ xlator_t *this = NULL; ++ dht_conf_t *conf = NULL; ++ gf_defrag_info_t *defrag = NULL; ++ gf_tier_conf_t *tier_conf = NULL; ++ gf_boolean_t is_hot_tier = args->is_hot_tier; ++ int freq = 0; ++ int ret = -1; ++ const char *tier_type = is_hot_tier ? "hot" : "cold"; ++ ++ this = args->this; ++ ++ conf = this->private; ++ ++ defrag = conf->defrag; ++ ++ tier_conf = &defrag->tier_conf; ++ ++ freq = is_hot_tier ? tier_get_freq_compact_hot(tier_conf) ++ : tier_get_freq_compact_cold(tier_conf); ++ ++ defrag->tier_conf.compact_mode_switched = ++ is_hot_tier ? defrag->tier_conf.compact_mode_switched_hot ++ : defrag->tier_conf.compact_mode_switched_cold; ++ ++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Compact mode %i", defrag->tier_conf.compact_mode_switched); ++ ++ if (tier_check_compact(tier_conf, current_time, freq)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Start compaction on %s tier", tier_type); ++ ++ args->freq_time = freq; ++ ret = tier_compact(args); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Compaction failed on " ++ "%s tier", ++ tier_type); ++ goto out; ++ } ++ ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "End compaction on %s tier", tier_type); ++ ++ if (is_hot_tier) { ++ defrag->tier_conf.compact_mode_switched_hot = _gf_false; ++ } else { ++ defrag->tier_conf.compact_mode_switched_cold = _gf_false; ++ } ++ } ++ ++out: ++ return ret; ++} ++ ++static int ++tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm) ++{ ++ if (mode == TIER_MODE_WM && wm == TIER_WM_HI) ++ return WM_INTERVAL_EMERG; ++ ++ return WM_INTERVAL; ++} ++ ++/* ++ * Main tiering loop. This is called from the promotion and the ++ * demotion threads spawned in tier_start(). ++ * ++ * Every second, wake from sleep to perform tasks. ++ * 1. Check trigger to migrate data. ++ * 2. Check for state changes (pause, unpause, stop). ++ */ ++static void * ++tier_run(void *in_args) ++{ ++ dht_conf_t *conf = NULL; ++ gfdb_time_t current_time = {0}; ++ int freq = 0; ++ int ret = 0; ++ xlator_t *any = NULL; ++ xlator_t *xlator = NULL; ++ gf_tier_conf_t *tier_conf = NULL; ++ loc_t root_loc = {0}; ++ int check_watermark = 0; ++ gf_defrag_info_t *defrag = NULL; ++ xlator_t *this = NULL; ++ migration_args_t *args = in_args; ++ GF_VALIDATE_OR_GOTO("tier", args, out); ++ GF_VALIDATE_OR_GOTO("tier", args->brick_list, out); ++ ++ this = args->this; ++ GF_VALIDATE_OR_GOTO("tier", this, out); ++ ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO("tier", conf, out); ++ ++ defrag = conf->defrag; ++ GF_VALIDATE_OR_GOTO("tier", defrag, out); ++ ++ if (list_empty(args->brick_list)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Brick list for tier is empty. Exiting."); ++ goto out; ++ } ++ ++ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; ++ tier_conf = &defrag->tier_conf; ++ ++ dht_build_root_loc(defrag->root_inode, &root_loc); ++ ++ while (1) { ++ /* ++ * Check if a graph switch occurred. If so, stop migration ++ * thread. It will need to be restarted manually. ++ */ ++ any = THIS->ctx->active->first; ++ xlator = xlator_search_by_name(any, this->name); ++ ++ if (xlator != this) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Detected graph switch. Exiting migration " ++ "daemon."); ++ goto out; ++ } ++ ++ gf_defrag_check_pause_tier(tier_conf); ++ ++ sleep(1); ++ ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ++ ret = 1; ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "defrag->defrag_status != " ++ "GF_DEFRAG_STATUS_STARTED"); ++ goto out; ++ } ++ ++ if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || ++ defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { ++ ret = 0; ++ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; ++ gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR, ++ "defrag->defrag_cmd == " ++ "GF_DEFRAG_CMD_START_DETACH_TIER"); ++ goto out; ++ } ++ ++ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) ++ continue; ++ ++ /* To have proper synchronization amongst all ++ * brick holding nodes, so that promotion and demotions ++ * start atomically w.r.t promotion/demotion frequency ++ * period, all nodes should have their system time ++ * in-sync with each other either manually set or ++ * using a NTP server*/ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, ++ DHT_MSG_SYS_CALL_GET_TIME_FAILED, ++ "Failed to get current time"); ++ goto out; ++ } ++ ++ check_watermark++; ++ ++ /* emergency demotion requires frequent watermark monitoring */ ++ if (check_watermark >= ++ tier_get_wm_interval(tier_conf->mode, tier_conf->watermark_last)) { ++ check_watermark = 0; ++ if (tier_conf->mode == TIER_MODE_WM) { ++ ret = tier_get_fs_stat(this, &root_loc); ++ if (ret != 0) { ++ continue; ++ } ++ ret = tier_check_watermark(this); ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_CRITICAL, errno, ++ DHT_MSG_LOG_TIER_ERROR, "Failed to get watermark"); ++ continue; ++ } ++ } ++ } ++ ++ if (args->is_promotion) { ++ freq = tier_get_freq_promote(tier_conf); ++ ++ if (tier_check_promote(tier_conf, current_time, freq)) { ++ args->freq_time = freq; ++ ret = tier_promote(args); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Promotion failed"); ++ } ++ } ++ } else if (args->is_compaction) { ++ tier_prepare_compact(args, current_time); ++ } else { ++ freq = tier_get_freq_demote(tier_conf); ++ ++ if (tier_check_demote(current_time, freq)) { ++ args->freq_time = freq; ++ ret = tier_demote(args); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Demotion failed"); ++ } ++ } ++ } ++ ++ /* Check the statfs immediately after the processing threads ++ return */ ++ check_watermark = WM_INTERVAL; ++ } ++ ++ ret = 0; ++out: ++ ++ args->return_value = ret; ++ ++ return NULL; ++} ++ ++int ++tier_start(xlator_t *this, gf_defrag_info_t *defrag) ++{ ++ pthread_t promote_thread; ++ pthread_t demote_thread; ++ pthread_t hot_compact_thread; ++ pthread_t cold_compact_thread; ++ int ret = -1; ++ struct list_head bricklist_hot = {0}; ++ struct list_head bricklist_cold = {0}; ++ migration_args_t promotion_args = {0}; ++ migration_args_t demotion_args = {0}; ++ migration_args_t hot_compaction_args = {0}; ++ migration_args_t cold_compaction_args = {0}; ++ dht_conf_t *conf = NULL; ++ ++ INIT_LIST_HEAD((&bricklist_hot)); ++ INIT_LIST_HEAD((&bricklist_cold)); ++ ++ conf = this->private; ++ ++ tier_get_bricklist(conf->subvolumes[1], &bricklist_hot); ++ set_brick_list_qpath(&bricklist_hot, _gf_false); ++ ++ demotion_args.this = this; ++ demotion_args.brick_list = &bricklist_hot; ++ demotion_args.defrag = defrag; ++ demotion_args.is_promotion = _gf_false; ++ demotion_args.is_compaction = _gf_false; ++ ++ ret = gf_thread_create(&demote_thread, NULL, &tier_run, &demotion_args, ++ "tierdem"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to start demotion thread."); ++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; ++ goto cleanup; ++ } ++ ++ tier_get_bricklist(conf->subvolumes[0], &bricklist_cold); ++ set_brick_list_qpath(&bricklist_cold, _gf_true); ++ ++ promotion_args.this = this; ++ promotion_args.brick_list = &bricklist_cold; ++ promotion_args.defrag = defrag; ++ promotion_args.is_promotion = _gf_true; ++ ++ ret = gf_thread_create(&promote_thread, NULL, &tier_run, &promotion_args, ++ "tierpro"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to start promotion thread."); ++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; ++ goto waitforspawned; ++ } ++ ++ hot_compaction_args.this = this; ++ hot_compaction_args.brick_list = &bricklist_hot; ++ hot_compaction_args.defrag = defrag; ++ hot_compaction_args.is_promotion = _gf_false; ++ hot_compaction_args.is_compaction = _gf_true; ++ hot_compaction_args.is_hot_tier = _gf_true; ++ ++ ret = gf_thread_create(&hot_compact_thread, NULL, &tier_run, ++ &hot_compaction_args, "tierhcom"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to start compaction thread."); ++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; ++ goto waitforspawnedpromote; ++ } ++ ++ cold_compaction_args.this = this; ++ cold_compaction_args.brick_list = &bricklist_cold; ++ cold_compaction_args.defrag = defrag; ++ cold_compaction_args.is_promotion = _gf_false; ++ cold_compaction_args.is_compaction = _gf_true; ++ cold_compaction_args.is_hot_tier = _gf_false; ++ ++ ret = gf_thread_create(&cold_compact_thread, NULL, &tier_run, ++ &cold_compaction_args, "tierccom"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to start compaction thread."); ++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; ++ goto waitforspawnedhotcompact; ++ } ++ pthread_join(cold_compact_thread, NULL); ++ ++waitforspawnedhotcompact: ++ pthread_join(hot_compact_thread, NULL); ++ ++waitforspawnedpromote: ++ pthread_join(promote_thread, NULL); ++ ++waitforspawned: ++ pthread_join(demote_thread, NULL); ++ ++cleanup: ++ clear_bricklist(&bricklist_cold); ++ clear_bricklist(&bricklist_hot); ++ return ret; ++} ++ ++int32_t ++tier_migration_needed(xlator_t *this) ++{ ++ gf_defrag_info_t *defrag = NULL; ++ dht_conf_t *conf = NULL; ++ int ret = 0; ++ ++ conf = this->private; ++ ++ GF_VALIDATE_OR_GOTO(this->name, conf, out); ++ GF_VALIDATE_OR_GOTO(this->name, conf->defrag, out); ++ ++ defrag = conf->defrag; ++ ++ if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) || ++ (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)) ++ ret = 1; ++out: ++ return ret; ++} ++ ++int32_t ++tier_migration_get_dst(xlator_t *this, dht_local_t *local) ++{ ++ dht_conf_t *conf = NULL; ++ int32_t ret = -1; ++ gf_defrag_info_t *defrag = NULL; ++ ++ GF_VALIDATE_OR_GOTO("tier", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, this->private, out); ++ ++ conf = this->private; ++ ++ defrag = conf->defrag; ++ ++ if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) { ++ local->rebalance.target_node = conf->subvolumes[0]; ++ ++ } else if (conf->subvolumes[0] == local->cached_subvol) ++ local->rebalance.target_node = conf->subvolumes[1]; ++ else ++ local->rebalance.target_node = conf->subvolumes[0]; ++ ++ if (local->rebalance.target_node) ++ ret = 0; ++ ++out: ++ return ret; ++} ++ ++xlator_t * ++tier_search(xlator_t *this, dht_layout_t *layout, const char *name) ++{ ++ xlator_t *subvol = NULL; ++ dht_conf_t *conf = NULL; ++ ++ GF_VALIDATE_OR_GOTO("tier", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, this->private, out); ++ ++ conf = this->private; ++ ++ subvol = TIER_HASHED_SUBVOL; ++ ++out: ++ return subvol; ++} ++ ++static int ++tier_load_externals(xlator_t *this) ++{ ++ int ret = -1; ++ char *libpathfull = (LIBDIR "/libgfdb.so.0"); ++ get_gfdb_methods_t get_gfdb_methods; ++ ++ GF_VALIDATE_OR_GOTO("this", this, out); ++ ++ libhandle = dlopen(libpathfull, RTLD_NOW); ++ if (!libhandle) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Error loading libgfdb.so %s\n", dlerror()); ++ ret = -1; ++ goto out; ++ } ++ ++ get_gfdb_methods = dlsym(libhandle, "get_gfdb_methods"); ++ if (!get_gfdb_methods) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Error loading get_gfdb_methods()"); ++ ret = -1; ++ goto out; ++ } ++ ++ get_gfdb_methods(&gfdb_methods); ++ ++ ret = 0; ++ ++out: ++ if (ret && libhandle) ++ dlclose(libhandle); ++ ++ return ret; ++} ++ ++static tier_mode_t ++tier_validate_mode(char *mode) ++{ ++ int ret = -1; ++ ++ if (strcmp(mode, "test") == 0) { ++ ret = TIER_MODE_TEST; ++ } else { ++ ret = TIER_MODE_WM; ++ } ++ ++ return ret; ++} ++ ++static gf_boolean_t ++tier_validate_compact_mode(char *mode) ++{ ++ gf_boolean_t ret = _gf_false; ++ ++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "tier_validate_compact_mode: mode = %s", mode); ++ ++ if (!strcmp(mode, "on")) { ++ ret = _gf_true; ++ } else { ++ ret = _gf_false; ++ } ++ ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, ++ "tier_validate_compact_mode: ret = %i", ret); ++ ++ return ret; ++} ++ ++int ++tier_init_methods(xlator_t *this) ++{ ++ int ret = -1; ++ dht_conf_t *conf = NULL; ++ dht_methods_t *methods = NULL; ++ ++ GF_VALIDATE_OR_GOTO("tier", this, err); ++ ++ conf = this->private; ++ ++ methods = &(conf->methods); ++ ++ methods->migration_get_dst_subvol = tier_migration_get_dst; ++ methods->migration_other = tier_start; ++ methods->migration_needed = tier_migration_needed; ++ methods->layout_search = tier_search; ++ ++ ret = 0; ++err: ++ return ret; ++} ++ ++static void ++tier_save_vol_name(xlator_t *this) ++{ ++ dht_conf_t *conf = NULL; ++ gf_defrag_info_t *defrag = NULL; ++ char *suffix = NULL; ++ int name_len = 0; ++ ++ conf = this->private; ++ defrag = conf->defrag; ++ ++ suffix = strstr(this->name, "-tier-dht"); ++ ++ if (suffix) ++ name_len = suffix - this->name; ++ else ++ name_len = strlen(this->name); ++ ++ if (name_len > GD_VOLUME_NAME_MAX) ++ name_len = GD_VOLUME_NAME_MAX; ++ ++ strncpy(defrag->tier_conf.volname, this->name, name_len); ++ defrag->tier_conf.volname[name_len] = 0; ++} ++ ++int ++tier_init(xlator_t *this) ++{ ++ int ret = -1; ++ int freq = 0; ++ int maxsize = 0; ++ dht_conf_t *conf = NULL; ++ gf_defrag_info_t *defrag = NULL; ++ char *voldir = NULL; ++ char *mode = NULL; ++ char *paused = NULL; ++ tier_mode_t tier_mode = DEFAULT_TIER_MODE; ++ gf_boolean_t compact_mode = _gf_false; ++ ++ ret = dht_init(this); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "tier_init failed"); ++ goto out; ++ } ++ ++ conf = this->private; ++ ++ ret = tier_init_methods(this); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "tier_init_methods failed"); ++ goto out; ++ } ++ ++ if (conf->subvolume_cnt != 2) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Invalid number of subvolumes %d", conf->subvolume_cnt); ++ goto out; ++ } ++ ++ /* if instatiated from client side initialization is complete. */ ++ if (!conf->defrag) { ++ ret = 0; ++ goto out; ++ } ++ ++ /* if instatiated from server side, load db libraries */ ++ ret = tier_load_externals(this); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Could not load externals. Aborting"); ++ goto out; ++ } ++ ++ defrag = conf->defrag; ++ ++ defrag->tier_conf.last_demote_qfile_index = 0; ++ defrag->tier_conf.last_promote_qfile_index = 0; ++ ++ defrag->tier_conf.is_tier = 1; ++ defrag->this = this; ++ ++ ret = dict_get_int32(this->options, "tier-max-promote-file-size", &maxsize); ++ if (ret) { ++ maxsize = 0; ++ } ++ ++ defrag->tier_conf.tier_max_promote_size = maxsize; ++ ++ ret = dict_get_int32(this->options, "tier-promote-frequency", &freq); ++ if (ret) { ++ freq = DEFAULT_PROMOTE_FREQ_SEC; ++ } ++ ++ defrag->tier_conf.tier_promote_frequency = freq; ++ ++ ret = dict_get_int32(this->options, "tier-demote-frequency", &freq); ++ if (ret) { ++ freq = DEFAULT_DEMOTE_FREQ_SEC; ++ } ++ ++ defrag->tier_conf.tier_demote_frequency = freq; ++ ++ ret = dict_get_int32(this->options, "tier-hot-compact-frequency", &freq); ++ if (ret) { ++ freq = DEFAULT_HOT_COMPACT_FREQ_SEC; ++ } ++ ++ defrag->tier_conf.tier_compact_hot_frequency = freq; ++ ++ ret = dict_get_int32(this->options, "tier-cold-compact-frequency", &freq); ++ if (ret) { ++ freq = DEFAULT_COLD_COMPACT_FREQ_SEC; ++ } ++ ++ defrag->tier_conf.tier_compact_cold_frequency = freq; ++ ++ ret = dict_get_int32(this->options, "watermark-hi", &freq); ++ if (ret) { ++ freq = DEFAULT_WM_HI; ++ } ++ ++ defrag->tier_conf.watermark_hi = freq; ++ ++ ret = dict_get_int32(this->options, "watermark-low", &freq); ++ if (ret) { ++ freq = DEFAULT_WM_LOW; ++ } ++ ++ defrag->tier_conf.watermark_low = freq; ++ ++ ret = dict_get_int32(this->options, "write-freq-threshold", &freq); ++ if (ret) { ++ freq = DEFAULT_WRITE_FREQ_SEC; ++ } ++ ++ defrag->write_freq_threshold = freq; ++ ++ ret = dict_get_int32(this->options, "read-freq-threshold", &freq); ++ if (ret) { ++ freq = DEFAULT_READ_FREQ_SEC; ++ } ++ ++ defrag->read_freq_threshold = freq; ++ ++ ret = dict_get_int32(this->options, "tier-max-mb", &freq); ++ if (ret) { ++ freq = DEFAULT_TIER_MAX_MIGRATE_MB; ++ } ++ ++ defrag->tier_conf.max_migrate_bytes = (uint64_t)freq * 1024 * 1024; ++ ++ ret = dict_get_int32(this->options, "tier-max-files", &freq); ++ if (ret) { ++ freq = DEFAULT_TIER_MAX_MIGRATE_FILES; ++ } ++ ++ defrag->tier_conf.max_migrate_files = freq; ++ ++ ret = dict_get_int32(this->options, "tier-query-limit", ++ &(defrag->tier_conf.query_limit)); ++ if (ret) { ++ defrag->tier_conf.query_limit = DEFAULT_TIER_QUERY_LIMIT; ++ } ++ ++ ret = dict_get_str(this->options, "tier-compact", &mode); ++ ++ if (ret) { ++ defrag->tier_conf.compact_active = DEFAULT_COMP_MODE; ++ } else { ++ compact_mode = tier_validate_compact_mode(mode); ++ /* If compaction is now active, we need to inform the bricks on ++ the hot and cold tier of this. See dht-common.h for more. */ ++ defrag->tier_conf.compact_active = compact_mode; ++ if (compact_mode) { ++ defrag->tier_conf.compact_mode_switched_hot = _gf_true; ++ defrag->tier_conf.compact_mode_switched_cold = _gf_true; ++ } ++ } ++ ++ ret = dict_get_str(this->options, "tier-mode", &mode); ++ if (ret) { ++ defrag->tier_conf.mode = DEFAULT_TIER_MODE; ++ } else { ++ tier_mode = tier_validate_mode(mode); ++ defrag->tier_conf.mode = tier_mode; ++ } ++ ++ pthread_mutex_init(&defrag->tier_conf.pause_mutex, 0); ++ ++ gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); ++ ++ ret = dict_get_str(this->options, "tier-pause", &paused); ++ ++ if (paused && strcmp(paused, "on") == 0) ++ gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE); ++ ++ ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name); ++ if (ret < 0) ++ goto out; ++ ++ ret = mkdir_p(voldir, 0777, _gf_true); ++ if (ret == -1 && errno != EEXIST) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "tier_init failed"); ++ ++ GF_FREE(voldir); ++ goto out; ++ } ++ ++ GF_FREE(voldir); ++ ++ ret = gf_asprintf(&promotion_qfile, "%s/%s/promote", ++ DEFAULT_VAR_RUN_DIRECTORY, this->name); ++ if (ret < 0) ++ goto out; ++ ++ ret = gf_asprintf(&demotion_qfile, "%s/%s/demote", ++ DEFAULT_VAR_RUN_DIRECTORY, this->name); ++ if (ret < 0) { ++ GF_FREE(promotion_qfile); ++ goto out; ++ } ++ ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Promote/demote frequency %d/%d " ++ "Write/Read freq thresholds %d/%d", ++ defrag->tier_conf.tier_promote_frequency, ++ defrag->tier_conf.tier_demote_frequency, ++ defrag->write_freq_threshold, defrag->read_freq_threshold); ++ ++ tier_save_vol_name(this); ++ ++ ret = 0; ++ ++out: ++ ++ return ret; ++} ++ ++int ++tier_cli_pause_done(int op_ret, call_frame_t *sync_frame, void *data) ++{ ++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, ++ "Migrate file paused with op_ret %d", op_ret); ++ ++ return op_ret; ++} ++ ++int ++tier_cli_pause(void *data) ++{ ++ gf_defrag_info_t *defrag = NULL; ++ xlator_t *this = NULL; ++ dht_conf_t *conf = NULL; ++ int ret = -1; ++ ++ this = data; ++ ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, exit); ++ ++ defrag = conf->defrag; ++ GF_VALIDATE_OR_GOTO(this->name, defrag, exit); ++ ++ gf_defrag_pause_tier(this, defrag); ++ ++ ret = 0; ++exit: ++ return ret; ++} ++ ++int ++tier_reconfigure(xlator_t *this, dict_t *options) ++{ ++ dht_conf_t *conf = NULL; ++ gf_defrag_info_t *defrag = NULL; ++ char *mode = NULL; ++ int migrate_mb = 0; ++ gf_boolean_t req_pause = _gf_false; ++ int ret = 0; ++ call_frame_t *frame = NULL; ++ gf_boolean_t last_compact_setting = _gf_false; ++ ++ conf = this->private; ++ ++ if (conf->defrag) { ++ defrag = conf->defrag; ++ GF_OPTION_RECONF("tier-max-promote-file-size", ++ defrag->tier_conf.tier_max_promote_size, options, ++ int32, out); ++ ++ GF_OPTION_RECONF("tier-promote-frequency", ++ defrag->tier_conf.tier_promote_frequency, options, ++ int32, out); ++ ++ GF_OPTION_RECONF("tier-demote-frequency", ++ defrag->tier_conf.tier_demote_frequency, options, ++ int32, out); ++ ++ GF_OPTION_RECONF("write-freq-threshold", defrag->write_freq_threshold, ++ options, int32, out); ++ ++ GF_OPTION_RECONF("read-freq-threshold", defrag->read_freq_threshold, ++ options, int32, out); ++ ++ GF_OPTION_RECONF("watermark-hi", defrag->tier_conf.watermark_hi, ++ options, int32, out); ++ ++ GF_OPTION_RECONF("watermark-low", defrag->tier_conf.watermark_low, ++ options, int32, out); ++ ++ last_compact_setting = defrag->tier_conf.compact_active; ++ ++ GF_OPTION_RECONF("tier-compact", defrag->tier_conf.compact_active, ++ options, bool, out); ++ ++ if (last_compact_setting != defrag->tier_conf.compact_active) { ++ defrag->tier_conf.compact_mode_switched_hot = _gf_true; ++ defrag->tier_conf.compact_mode_switched_cold = _gf_true; ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "compact mode switched"); ++ } ++ ++ GF_OPTION_RECONF("tier-hot-compact-frequency", ++ defrag->tier_conf.tier_compact_hot_frequency, options, ++ int32, out); ++ ++ GF_OPTION_RECONF("tier-cold-compact-frequency", ++ defrag->tier_conf.tier_compact_cold_frequency, options, ++ int32, out); ++ ++ GF_OPTION_RECONF("tier-mode", mode, options, str, out); ++ defrag->tier_conf.mode = tier_validate_mode(mode); ++ ++ GF_OPTION_RECONF("tier-max-mb", migrate_mb, options, int32, out); ++ defrag->tier_conf.max_migrate_bytes = (uint64_t)migrate_mb * 1024 * ++ 1024; ++ ++ GF_OPTION_RECONF("tier-max-files", defrag->tier_conf.max_migrate_files, ++ options, int32, out); ++ ++ GF_OPTION_RECONF("tier-query-limit", defrag->tier_conf.query_limit, ++ options, int32, out); ++ ++ GF_OPTION_RECONF("tier-pause", req_pause, options, bool, out); ++ ++ if (req_pause == _gf_true) { ++ frame = create_frame(this, this->ctx->pool); ++ if (!frame) ++ goto out; ++ ++ frame->root->pid = GF_CLIENT_PID_DEFRAG; ++ ++ ret = synctask_new(this->ctx->env, tier_cli_pause, ++ tier_cli_pause_done, frame, this); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "pause tier failed on reconfigure"); ++ } ++ } else { ++ ret = gf_defrag_resume_tier(this, defrag); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "resume tier failed on reconfigure"); ++ } ++ } ++ } ++ ++out: ++ return dht_reconfigure(this, options); ++} ++ ++void ++tier_fini(xlator_t *this) ++{ ++ if (libhandle) ++ dlclose(libhandle); ++ ++ GF_FREE(demotion_qfile); ++ GF_FREE(promotion_qfile); ++ ++ dht_fini(this); ++} ++ ++struct xlator_fops fops = { ++ ++ .lookup = dht_lookup, ++ .create = tier_create, ++ .mknod = dht_mknod, ++ ++ .open = dht_open, ++ .statfs = tier_statfs, ++ .opendir = dht_opendir, ++ .readdir = tier_readdir, ++ .readdirp = tier_readdirp, ++ .fsyncdir = dht_fsyncdir, ++ .symlink = dht_symlink, ++ .unlink = tier_unlink, ++ .link = tier_link, ++ .mkdir = dht_mkdir, ++ .rmdir = dht_rmdir, ++ .rename = dht_rename, ++ .entrylk = dht_entrylk, ++ .fentrylk = dht_fentrylk, ++ ++ /* Inode read operations */ ++ .stat = dht_stat, ++ .fstat = dht_fstat, ++ .access = dht_access, ++ .readlink = dht_readlink, ++ .getxattr = dht_getxattr, ++ .fgetxattr = dht_fgetxattr, ++ .readv = dht_readv, ++ .flush = dht_flush, ++ .fsync = dht_fsync, ++ .inodelk = dht_inodelk, ++ .finodelk = dht_finodelk, ++ .lk = dht_lk, ++ ++ /* Inode write operations */ ++ .fremovexattr = dht_fremovexattr, ++ .removexattr = dht_removexattr, ++ .setxattr = dht_setxattr, ++ .fsetxattr = dht_fsetxattr, ++ .truncate = dht_truncate, ++ .ftruncate = dht_ftruncate, ++ .writev = dht_writev, ++ .xattrop = dht_xattrop, ++ .fxattrop = dht_fxattrop, ++ .setattr = dht_setattr, ++ .fsetattr = dht_fsetattr, ++ .fallocate = dht_fallocate, ++ .discard = dht_discard, ++ .zerofill = dht_zerofill, ++}; ++ ++struct xlator_cbks cbks = {.release = dht_release, .forget = dht_forget}; ++ ++extern int32_t ++mem_acct_init(xlator_t *this); ++ ++extern struct volume_options dht_options[]; ++ ++xlator_api_t xlator_api = { ++ .init = tier_init, ++ .fini = tier_fini, ++ .notify = dht_notify, ++ .reconfigure = tier_reconfigure, ++ .mem_acct_init = mem_acct_init, ++ .op_version = {GD_OP_VERSION_3_7_0}, /* Present from the initial version */ ++ .fops = &fops, ++ .cbks = &cbks, ++ .options = dht_options, ++ .identifier = "tier", ++ .category = GF_MAINTAINED, ++}; ++ +diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h +new file mode 100644 +index 0000000..a20b1db +--- /dev/null ++++ b/xlators/cluster/dht/src/tier.h +@@ -0,0 +1,110 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef _TIER_H_ ++#define _TIER_H_ ++ ++/******************************************************************************/ ++/* This is from dht-rebalancer.c as we don't have dht-rebalancer.h */ ++#include "dht-common.h" ++#include ++#include ++#include ++#include ++ ++/* ++ * Size of timer wheel. We would not promote or demote less ++ * frequently than this number. ++ */ ++#define TIMER_SECS 3600 ++ ++#include "gfdb_data_store.h" ++#include ++#include ++ ++#define PROMOTION_QFILE "promotequeryfile" ++#define DEMOTION_QFILE "demotequeryfile" ++ ++#define TIER_HASHED_SUBVOL conf->subvolumes[0] ++#define TIER_UNHASHED_SUBVOL conf->subvolumes[1] ++ ++#define GET_QFILE_PATH(is_promotion) \ ++ (is_promotion) ? promotion_qfile : demotion_qfile ++ ++typedef struct tier_qfile_array { ++ int *fd_array; ++ ssize_t array_size; ++ ssize_t next_index; ++ /* Indicate the number of exhuasted FDs*/ ++ ssize_t exhausted_count; ++} tier_qfile_array_t; ++ ++typedef struct _query_cbk_args { ++ xlator_t *this; ++ gf_defrag_info_t *defrag; ++ /* This is write */ ++ int query_fd; ++ int is_promotion; ++ int is_compaction; ++ /* This is for read */ ++ tier_qfile_array_t *qfile_array; ++} query_cbk_args_t; ++ ++int ++gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag); ++ ++typedef struct gfdb_brick_info { ++ gfdb_time_t *time_stamp; ++ gf_boolean_t _gfdb_promote; ++ query_cbk_args_t *_query_cbk_args; ++} gfdb_brick_info_t; ++ ++typedef struct brick_list { ++ xlator_t *xlator; ++ char *brick_db_path; ++ char brick_name[NAME_MAX]; ++ char qfile_path[PATH_MAX]; ++ struct list_head list; ++} tier_brick_list_t; ++ ++typedef struct _dm_thread_args { ++ xlator_t *this; ++ gf_defrag_info_t *defrag; ++ struct list_head *brick_list; ++ int freq_time; ++ int return_value; ++ int is_promotion; ++ int is_compaction; ++ gf_boolean_t is_hot_tier; ++} migration_args_t; ++ ++typedef enum tier_watermark_op_ { ++ TIER_WM_NONE = 0, ++ TIER_WM_LOW, ++ TIER_WM_HI, ++ TIER_WM_MID ++} tier_watermark_op_t; ++ ++#define DEFAULT_PROMOTE_FREQ_SEC 120 ++#define DEFAULT_DEMOTE_FREQ_SEC 120 ++#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800 ++#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800 ++#define DEFAULT_DEMOTE_DEGRADED 1 ++#define DEFAULT_WRITE_FREQ_SEC 0 ++#define DEFAULT_READ_FREQ_SEC 0 ++#define DEFAULT_WM_LOW 75 ++#define DEFAULT_WM_HI 90 ++#define DEFAULT_TIER_MODE TIER_MODE_TEST ++#define DEFAULT_COMP_MODE _gf_true ++#define DEFAULT_TIER_MAX_MIGRATE_MB 1000 ++#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000 ++#define DEFAULT_TIER_QUERY_LIMIT 100 ++ ++#endif +diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am +index 194634b..545c02b 100644 +--- a/xlators/features/Makefile.am ++++ b/xlators/features/Makefile.am +@@ -5,6 +5,6 @@ endif + SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \ + compress changelog gfid-access snapview-client snapview-server trash \ + shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \ +- utime ++ utime changetimerecorder + + CLEANFILES = +diff --git a/xlators/features/changetimerecorder/Makefile.am b/xlators/features/changetimerecorder/Makefile.am +new file mode 100644 +index 0000000..a985f42 +--- /dev/null ++++ b/xlators/features/changetimerecorder/Makefile.am +@@ -0,0 +1,3 @@ ++SUBDIRS = src ++ ++CLEANFILES = +diff --git a/xlators/features/changetimerecorder/src/Makefile.am b/xlators/features/changetimerecorder/src/Makefile.am +new file mode 100644 +index 0000000..620017e +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/Makefile.am +@@ -0,0 +1,26 @@ ++xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features ++ ++# changetimerecorder can only get build when libgfdb is enabled ++if BUILD_GFDB ++ xlator_LTLIBRARIES = changetimerecorder.la ++endif ++ ++changetimerecorder_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) ++ ++changetimerecorder_la_SOURCES = changetimerecorder.c \ ++ ctr-helper.c ctr-xlator-ctx.c ++ ++changetimerecorder_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ ++ $(top_builddir)/libglusterfs/src/gfdb/libgfdb.la ++ ++noinst_HEADERS = ctr-messages.h changetimerecorder.h ctr_mem_types.h \ ++ ctr-helper.h ctr-xlator-ctx.h ++ ++AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ ++ -I$(top_srcdir)/libglusterfs/src/gfdb \ ++ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ ++ -DDATADIR=\"$(localstatedir)\" ++ ++AM_CFLAGS = -Wall $(GF_CFLAGS) $(SQLITE_CFLAGS) ++ ++CLEANFILES = +diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c +new file mode 100644 +index 0000000..f2aa4a9 +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/changetimerecorder.c +@@ -0,0 +1,2371 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++#include ++#include ++ ++#include "gfdb_sqlite3.h" ++#include "ctr-helper.h" ++#include "ctr-messages.h" ++#include ++ ++#include "changetimerecorder.h" ++#include "tier-ctr-interface.h" ++ ++/*******************************inode forget***********************************/ ++int ++ctr_forget(xlator_t *this, inode_t *inode) ++{ ++ fini_ctr_xlator_ctx(this, inode); ++ return 0; ++} ++ ++/************************** Look up heal **************************************/ ++/* ++Problem: The CTR xlator records file meta (heat/hardlinks) ++into the data. This works fine for files which are created ++after ctr xlator is switched ON. But for files which were ++created before CTR xlator is ON, CTR xlator is not able to ++record either of the meta i.e heat or hardlinks. Thus making ++those files immune to promotions/demotions. ++ ++Solution: The solution that is implemented in this patch is ++do ctr-db heal of all those pre-existent files, using named lookup. ++For this purpose we use the inode-xlator context variable option ++in gluster. ++The inode-xlator context variable for ctr xlator will have the ++following, ++ a. A Lock for the context variable ++ b. A hardlink list: This list represents the successful looked ++ up hardlinks. ++These are the scenarios when the hardlink list is updated: ++1) Named-Lookup: Whenever a named lookup happens on a file, in the ++ wind path we copy all required hardlink and inode information to ++ ctr_db_record structure, which resides in the frame->local variable. ++ We don't update the database in wind. During the unwind, we read the ++ information from the ctr_db_record and , ++ Check if the inode context variable is created, if not we create it. ++ Check if the hard link is there in the hardlink list. ++ If its not there we add it to the list and send a update to the ++ database using libgfdb. ++ Please note: The database transaction can fail(and we ignore) as there ++ already might be a record in the db. This update to the db is to heal ++ if its not there. ++ If its there in the list we ignore it. ++2) Inode Forget: Whenever an inode forget hits we clear the hardlink list in ++ the inode context variable and delete the inode context variable. ++ Please note: An inode forget may happen for two reason, ++ a. when the inode is delete. ++ b. the in-memory inode is evicted from the inode table due to cache limits. ++3) create: whenever a create happens we create the inode context variable and ++ add the hardlink. The database updation is done as usual by ctr. ++4) link: whenever a hardlink is created for the inode, we create the inode ++ context variable, if not present, and add the hardlink to the list. ++5) unlink: whenever a unlink happens we delete the hardlink from the list. ++6) mknod: same as create. ++7) rename: whenever a rename happens we update the hardlink in list. if the ++ hardlink was not present for updation, we add the hardlink to the list. ++ ++What is pending: ++1) This solution will only work for named lookups. ++2) We don't track afr-self-heal/dht-rebalancer traffic for healing. ++ ++*/ ++ ++/* This function does not write anything to the db, ++ * just created the local variable ++ * for the frame and sets values for the ctr_db_record */ ++static int ++ctr_lookup_wind(call_frame_t *frame, xlator_t *this, ++ gf_ctr_inode_context_t *ctr_inode_cx) ++{ ++ int ret = -1; ++ gf_ctr_private_t *_priv = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ GF_ASSERT(this); ++ IS_CTR_INODE_CX_SANE(ctr_inode_cx); ++ ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ ++ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) { ++ frame->local = init_ctr_local_t(this); ++ if (!frame->local) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, ++ "WIND: Error while creating ctr local"); ++ goto out; ++ }; ++ ctr_local = frame->local; ++ /*Definitely no internal fops will reach here*/ ++ ctr_local->is_internal_fop = _gf_false; ++ /*Don't record counters*/ ++ CTR_DB_REC(ctr_local).do_record_counters = _gf_false; ++ /*Don't record time at all*/ ++ CTR_DB_REC(ctr_local).do_record_times = _gf_false; ++ ++ /* Copy gfid into db record*/ ++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid)); ++ ++ /* Set fop_path and fop_type, required by libgfdb to make ++ * decision while inserting the record */ ++ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path; ++ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type; ++ ++ /* Copy hard link info*/ ++ gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid, ++ *((NEW_LINK_CX(ctr_inode_cx))->pargfid)); ++ if (snprintf(CTR_DB_REC(ctr_local).file_name, ++ sizeof(CTR_DB_REC(ctr_local).file_name), "%s", ++ NEW_LINK_CX(ctr_inode_cx)->basename) >= ++ sizeof(CTR_DB_REC(ctr_local).file_name)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, ++ "WIND: Error copying filename of ctr local"); ++ goto out; ++ } ++ /* Since we are in lookup we can ignore errors while ++ * Inserting in the DB, because there may be many ++ * to write to the DB attempts for healing. ++ * We don't want to log all failed attempts and ++ * bloat the log*/ ++ ctr_local->gfdb_db_record.ignore_errors = _gf_true; ++ } ++ ++ ret = 0; ++ ++out: ++ ++ if (ret) { ++ free_ctr_local(ctr_local); ++ frame->local = NULL; ++ } ++ ++ return ret; ++} ++ ++/* This function inserts the ctr_db_record populated by ctr_lookup_wind ++ * in to the db. It also destroys the frame->local created by ctr_lookup_wind */ ++static int ++ctr_lookup_unwind(call_frame_t *frame, xlator_t *this) ++{ ++ int ret = -1; ++ gf_ctr_private_t *_priv = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ ++ GF_ASSERT(_priv->_db_conn); ++ ++ ctr_local = frame->local; ++ ++ if (ctr_local && (ctr_local->ia_inode_type != IA_IFDIR)) { ++ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record); ++ if (ret == -1) { ++ gf_msg(this->name, ++ _gfdb_log_level(GF_LOG_ERROR, ++ ctr_local->gfdb_db_record.ignore_errors), ++ 0, CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, ++ "UNWIND: Error filling ctr local"); ++ goto out; ++ } ++ } ++ ret = 0; ++out: ++ free_ctr_local(ctr_local); ++ frame->local = NULL; ++ return ret; ++} ++ ++/****************************************************************************** ++ * ++ * FOPS HANDLING BELOW ++ * ++ * ***************************************************************************/ ++ ++/****************************LOOKUP********************************************/ ++ ++int32_t ++ctr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *dict, struct iatt *postparent) ++{ ++ int ret = -1; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR; ++ gf_boolean_t _is_heal_needed = _gf_false; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /* if the lookup failed lookup don't do anything*/ ++ if (op_ret == -1) { ++ gf_msg_trace(this->name, 0, "lookup failed with %s", ++ strerror(op_errno)); ++ goto out; ++ } ++ ++ /* Ignore directory lookups */ ++ if (inode->ia_type == IA_IFDIR) { ++ goto out; ++ } ++ ++ /* if frame local was not set by the ctr_lookup() ++ * so don't so anything*/ ++ if (!frame->local) { ++ goto out; ++ } ++ ++ /* if the lookup is for dht link donot record*/ ++ if (dht_is_linkfile(buf, dict)) { ++ gf_msg_trace(this->name, 0, ++ "Ignoring Lookup " ++ "for dht link file"); ++ goto out; ++ } ++ ++ ctr_local = frame->local; ++ /*Assign the proper inode type*/ ++ ctr_local->ia_inode_type = inode->ia_type; ++ ++ /* Copy gfid directly from inode */ ++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid); ++ ++ /* Checking if gfid and parent gfid is valid */ ++ if (gf_uuid_is_null(CTR_DB_REC(ctr_local).gfid) || ++ gf_uuid_is_null(CTR_DB_REC(ctr_local).pargfid)) { ++ gf_msg_trace(this->name, 0, "Invalid GFID"); ++ goto out; ++ } ++ ++ /* if its a first entry ++ * then mark the ctr_record for create ++ * A create will attempt a file and a hard link created in the db*/ ++ ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode); ++ if (!ctr_xlator_ctx) { ++ /* This marks inode heal */ ++ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE; ++ _is_heal_needed = _gf_true; ++ } ++ ++ /* Copy the correct gfid from resolved inode */ ++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid); ++ ++ /* Add hard link to the list */ ++ ret_val = add_hard_link_ctx(frame, this, inode); ++ if (ret_val == CTR_CTX_ERROR) { ++ gf_msg_trace(this->name, 0, "Failed adding hardlink to list"); ++ goto out; ++ } ++ /* If inode needs healing then heal the hardlink also */ ++ else if (ret_val & CTR_TRY_INODE_HEAL) { ++ /* This marks inode heal */ ++ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE; ++ _is_heal_needed = _gf_true; ++ } ++ /* If hardlink needs healing */ ++ else if (ret_val & CTR_TRY_HARDLINK_HEAL) { ++ _is_heal_needed = _gf_true; ++ } ++ ++ /* If lookup heal needed */ ++ if (!_is_heal_needed) ++ goto out; ++ ++ /* FINALLY HEAL : Inserts the ctr_db_record populated by ctr_lookup_wind ++ * in to the db. It also destroys the frame->local ++ * created by ctr_lookup_wind */ ++ ret = ctr_lookup_unwind(frame, this); ++ if (ret) { ++ gf_msg_trace(this->name, 0, "Failed healing/inserting link"); ++ } ++ ++out: ++ free_ctr_local((gf_ctr_local_t *)frame->local); ++ frame->local = NULL; ++ ++ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, dict, ++ postparent); ++ ++ return 0; ++} ++ ++int32_t ++ctr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) ++{ ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t ctr_link_cx; ++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ ++ /* Don't handle nameless lookups*/ ++ if (!loc->parent || !loc->name) ++ goto out; ++ ++ /*fill ctr link context*/ ++ FILL_CTR_LINK_CX(_link_cx, loc->parent->gfid, loc->name, out); ++ ++ /* Fill ctr inode context*/ ++ /* IA_IFREG : We assume its a file in the wind ++ * but in the unwind we are sure what the inode is a file ++ * or directory ++ * gfid: we are just filling loc->gfid which is not correct. ++ * In unwind we fill the correct gfid for successful lookup*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, IA_IFREG, loc->gfid, _link_cx, NULL, ++ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); ++ ++ /* Create the frame->local and populate ctr_db_record ++ * No writing to the db yet */ ++ ret = ctr_lookup_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED, ++ "Failed to insert link wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_lookup_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, xdata); ++ return 0; ++} ++ ++/****************************WRITEV********************************************/ ++int32_t ++ctr_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_UNWIND_FAILED, ++ "Failed to insert writev unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, ++ xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, ++ int32_t count, off_t off, uint32_t flags, struct iobref *iobref, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_WIND_FAILED, ++ "Failed to insert writev wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_writev_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags, ++ iobref, xdata); ++ ++ return 0; ++} ++ ++/******************************setattr*****************************************/ ++ ++int32_t ++ctr_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf, ++ struct iatt *postop_stbuf, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED, ++ "Failed to insert setattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop_stbuf, ++ postop_stbuf, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, ++ int32_t valid, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, ++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, ++ "Failed to insert setattr wind"); ++ } ++out: ++ ++ STACK_WIND(frame, ctr_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); ++ ++ return 0; ++} ++ ++/*************************** fsetattr ***************************************/ ++int32_t ++ctr_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf, ++ struct iatt *postop_stbuf, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED, ++ "Failed to insert fsetattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, preop_stbuf, ++ postop_stbuf, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, ++ int32_t valid, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, ++ "Failed to insert fsetattr wind"); ++ } ++out: ++ STACK_WIND(frame, ctr_fsetattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); ++ ++ return 0; ++} ++/****************************fremovexattr************************************/ ++ ++int32_t ++ctr_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED, ++ "Failed to insert fremovexattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED, ++ "Failed to insert fremovexattr wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_fremovexattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); ++ return 0; ++} ++ ++/****************************removexattr*************************************/ ++ ++int32_t ++ctr_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED, ++ "Failed to insert removexattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, ++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED, ++ "Failed to insert removexattr wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_removexattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); ++ return 0; ++} ++ ++/****************************truncate****************************************/ ++ ++int32_t ++ctr_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED, ++ "Failed to insert truncate unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, ++ xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, ++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_TRUNCATE_WIND_FAILED, ++ "Failed to insert truncate wind"); ++ } ++out: ++ STACK_WIND(frame, ctr_truncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); ++ return 0; ++} ++ ++/****************************ftruncate***************************************/ ++ ++int32_t ++ctr_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED, ++ "Failed to insert ftruncate unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, ++ xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED, ++ "Failed to insert ftruncate wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_ftruncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); ++ return 0; ++} ++ ++/****************************rename******************************************/ ++int32_t ++ctr_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ struct iatt *preoldparent, struct iatt *postoldparent, ++ struct iatt *prenewparent, struct iatt *postnewparent, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ uint32_t remaining_links = -1; ++ gf_ctr_local_t *ctr_local = NULL; ++ gfdb_fop_type_t fop_type = GFDB_FOP_INVALID_OP; ++ gfdb_fop_path_t fop_path = GFDB_FOP_INVALID; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, ++ GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED, ++ "Failed to insert rename unwind"); ++ goto out; ++ } ++ ++ if (!xdata) ++ goto out; ++ /* ++ * ++ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator ++ * This is only set when we are overwriting hardlinks. ++ * ++ * */ ++ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, ++ &remaining_links); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA"); ++ remaining_links = -1; ++ goto out; ++ } ++ ++ ctr_local = frame->local; ++ if (!ctr_local) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_NULL_LOCAL, ++ "ctr_local is NULL."); ++ goto out; ++ } ++ ++ /* This is not the only link */ ++ if (remaining_links > 1) { ++ fop_type = GFDB_FOP_DENTRY_WRITE; ++ fop_path = GFDB_FOP_UNDEL; ++ } ++ /* Last link that was deleted */ ++ else if (remaining_links == 1) { ++ fop_type = GFDB_FOP_DENTRY_WRITE; ++ fop_path = GFDB_FOP_UNDEL_ALL; ++ } else { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED, ++ "Invalid link count from posix"); ++ goto out; ++ } ++ ++ ret = ctr_delete_hard_link_from_db( ++ this, CTR_DB_REC(ctr_local).old_gfid, CTR_DB_REC(ctr_local).pargfid, ++ CTR_DB_REC(ctr_local).file_name, fop_type, fop_path); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, ++ "Failed to delete records of %s", ++ CTR_DB_REC(ctr_local).old_file_name); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent, ++ postoldparent, prenewparent, postnewparent, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t new_link_cx, old_link_cx; ++ gf_ctr_link_context_t *_nlink_cx = &new_link_cx; ++ gf_ctr_link_context_t *_olink_cx = &old_link_cx; ++ int is_dict_created = 0; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill old link context*/ ++ FILL_CTR_LINK_CX(_olink_cx, oldloc->pargfid, oldloc->name, out); ++ ++ /*Fill new link context*/ ++ FILL_CTR_LINK_CX(_nlink_cx, newloc->pargfid, newloc->name, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type, ++ oldloc->inode->gfid, _nlink_cx, _olink_cx, ++ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); ++ ++ /* If the rename is a overwrite of hardlink ++ * rename ("file1", "file2") ++ * file1 is hardlink for gfid say 00000000-0000-0000-0000-00000000000A ++ * file2 is hardlink for gfid say 00000000-0000-0000-0000-00000000000B ++ * so we are saving file2 gfid in old_gfid so that we delete entries ++ * from the db during rename callback if the fop is successful ++ * */ ++ if (newloc->inode) { ++ /* This is the GFID from where the newloc hardlink will be ++ * unlinked */ ++ _inode_cx->old_gfid = &newloc->inode->gfid; ++ } ++ ++ /* Is a metatdata fop */ ++ _inode_cx->is_metadata_fop = _gf_true; ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_WIND_FAILED, ++ "Failed to insert rename wind"); ++ } else { ++ /* We are doing updation of hard link in inode context in wind ++ * As we don't get the "inode" in the call back for rename */ ++ ret = update_hard_link_ctx(frame, this, oldloc->inode); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_UPDATE_HARDLINK_FAILED, ++ "Failed " ++ "updating hard link in ctr inode context"); ++ goto out; ++ } ++ ++ /* If the newloc has an inode. i.e acquiring hardlink of an ++ * exisitng file i.e overwritting a file. ++ * */ ++ if (newloc->inode) { ++ /* Getting the ctr inode context variable for ++ * inode whose hardlink will be acquired during ++ * the rename ++ * */ ++ ctr_xlator_ctx = get_ctr_xlator_ctx(this, newloc->inode); ++ if (!ctr_xlator_ctx) { ++ /* Since there is no ctr inode context ++ * so nothing more to do */ ++ ret = 0; ++ goto out; ++ } ++ ++ /* Deleting hardlink from context variable */ ++ ret = ctr_delete_hard_link(this, ctr_xlator_ctx, newloc->pargfid, ++ newloc->name); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_DELETE_HARDLINK_FAILED, ++ "Failed to delete hard link"); ++ goto out; ++ } ++ ++ /* Requesting for number of hardlinks on the newloc ++ * inode from POSIX. ++ * */ ++ is_dict_created = set_posix_link_request(this, &xdata); ++ if (is_dict_created == -1) { ++ ret = -1; ++ goto out; ++ } ++ } ++ } ++ ++out: ++ STACK_WIND(frame, ctr_rename_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); ++ ++ if (is_dict_created == 1) { ++ dict_unref(xdata); ++ } ++ ++ return 0; ++} ++ ++/****************************unlink******************************************/ ++int32_t ++ctr_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ int ret = -1; ++ uint32_t remaining_links = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ if (!xdata) ++ goto out; ++ ++ /* ++ * ++ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator ++ * ++ * */ ++ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, ++ &remaining_links); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA"); ++ remaining_links = -1; ++ } ++ ++ /*This is not the only link*/ ++ if (remaining_links != 1) { ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, ++ GFDB_FOP_UNDEL); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, ++ "Failed to insert unlink unwind"); ++ } ++ } ++ /*Last link that was deleted*/ ++ else if (remaining_links == 1) { ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, ++ GFDB_FOP_UNDEL_ALL); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, ++ "Failed to insert unlink unwind"); ++ } ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, ++ xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t ctr_link_cx; ++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; ++ gf_boolean_t is_xdata_created = _gf_false; ++ struct iatt dummy_stat = {0}; ++ ++ GF_ASSERT(frame); ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill link context*/ ++ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, ++ _link_cx, NULL, GFDB_FOP_DENTRY_WRITE, ++ GFDB_FOP_WDEL); ++ ++ /*Internal FOP*/ ++ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata); ++ ++ /* Is a metadata FOP */ ++ _inode_cx->is_metadata_fop = _gf_true; ++ ++ /* If its a internal FOP and dht link file donot record*/ ++ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) { ++ goto out; ++ } ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, ++ "Failed to insert unlink wind"); ++ } else { ++ /* We are doing delete of hard link in inode context in wind ++ * As we don't get the "inode" in the call back for rename */ ++ ret = delete_hard_link_ctx(frame, this, loc->inode); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED, ++ "Failed " ++ "deleting hard link from ctr inode context"); ++ } ++ } ++ ++ /* ++ * ++ * Sending GF_REQUEST_LINK_COUNT_XDATA ++ * to POSIX Xlator to send link count in unwind path ++ * ++ * */ ++ /*create xdata if NULL*/ ++ if (!xdata) { ++ xdata = dict_new(); ++ is_xdata_created = (xdata) ? _gf_true : _gf_false; ++ } ++ if (!xdata) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL, ++ "xdata is NULL :Cannot send " ++ "GF_REQUEST_LINK_COUNT_XDATA to posix"); ++ goto out; ++ } ++ ++ ret = dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ "Failed setting GF_REQUEST_LINK_COUNT_XDATA"); ++ if (is_xdata_created) { ++ dict_unref(xdata); ++ } ++ goto out; ++ } ++ ++out: ++ STACK_WIND(frame, ctr_unlink_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); ++ ++ if (is_xdata_created) ++ dict_unref(xdata); ++ ++ return 0; ++} ++ ++/****************************fsync******************************************/ ++int32_t ++ctr_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, ++ "Failed to insert fsync unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_WIND_FAILED, ++ "Failed to insert fsync wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_fsync_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata); ++ return 0; ++} ++ ++/****************************setxattr****************************************/ ++ ++int ++ctr_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, ++ "Failed to insert setxattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata); ++ ++ return 0; ++} ++ ++int ++ctr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, ++ int flags, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, ++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, ++ "Failed to insert setxattr wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_setxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata); ++ return 0; ++} ++/**************************** fsetxattr *************************************/ ++int32_t ++ctr_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, ++ "Failed to insert fsetxattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, ++ int32_t flags, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, ++ "Failed to insert fsetxattr wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_fsetxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); ++ return 0; ++} ++/****************************mknod*******************************************/ ++ ++int32_t ++ctr_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, struct iatt *buf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) ++{ ++ int ret = -1; ++ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ /* Add hard link to the list */ ++ ret_val = add_hard_link_ctx(frame, this, inode); ++ if (ret_val == CTR_CTX_ERROR) { ++ gf_msg_trace(this->name, 0, "Failed adding hard link"); ++ } ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE, ++ GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED, ++ "Failed to insert mknod unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent, ++ postparent, xdata); ++ ++ return 0; ++} ++ ++int ++ctr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, ++ dev_t rdev, mode_t umask, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t ctr_link_cx; ++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; ++ uuid_t gfid = { ++ 0, ++ }; ++ uuid_t *ptr_gfid = &gfid; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ ++ /*get gfid from xdata dict*/ ++ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "failed to get gfid from dict"); ++ goto out; ++ } ++ ++ /*fill ctr link context*/ ++ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx, ++ NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_WIND_FAILED, ++ "Failed to insert mknod wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_mknod_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); ++ return 0; ++} ++ ++/****************************create******************************************/ ++int ++ctr_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = add_hard_link_ctx(frame, this, inode); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_ADD_HARDLINK_FAILED, ++ "Failed adding hard link"); ++ } ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE, ++ GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED, ++ "Failed to insert create unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf, ++ preparent, postparent, xdata); ++ ++ return 0; ++} ++ ++int ++ctr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t ctr_link_cx; ++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; ++ uuid_t gfid = { ++ 0, ++ }; ++ uuid_t *ptr_gfid = &gfid; ++ struct iatt dummy_stat = {0}; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ ++ /*Get GFID from Xdata dict*/ ++ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_GET_GFID_FROM_DICT_FAILED, ++ "failed to get gfid from dict"); ++ goto out; ++ } ++ ++ /*fill ctr link context*/ ++ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx, ++ NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND); ++ ++ /*Internal FOP*/ ++ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata); ++ ++ /* If its a internal FOP and dht link file donot record*/ ++ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) { ++ goto out; ++ } ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, &ctr_inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_WIND_FAILED, ++ "Failed to insert create wind"); ++ } ++out: ++ STACK_WIND(frame, ctr_create_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, ++ xdata); ++ return 0; ++} ++ ++/****************************link********************************************/ ++ ++int ++ctr_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ /* Add hard link to the list */ ++ ret = add_hard_link_ctx(frame, this, inode); ++ if (ret) { ++ gf_msg_trace(this->name, 0, "Failed adding hard link"); ++ } ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, ++ GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED, ++ "Failed to insert create unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, stbuf, preparent, ++ postparent, xdata); ++ return 0; ++} ++ ++int ++ctr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t ctr_link_cx; ++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; ++ struct iatt dummy_stat = {0}; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ ++ /*fill ctr link context*/ ++ FILL_CTR_LINK_CX(_link_cx, newloc->pargfid, newloc->name, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type, ++ oldloc->inode->gfid, _link_cx, NULL, ++ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); ++ ++ /*Internal FOP*/ ++ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata); ++ ++ /* Is a metadata fop */ ++ _inode_cx->is_metadata_fop = _gf_true; ++ ++ /* If its a internal FOP and dht link file donot record*/ ++ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) { ++ goto out; ++ } ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED, ++ "Failed to insert link wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_link_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); ++ return 0; ++} ++ ++/******************************readv*****************************************/ ++int ++ctr_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, struct iovec *vector, int count, struct iatt *stbuf, ++ struct iobref *iobref, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_READ, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED, ++ "Failed to insert create unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf, ++ iobref, xdata); ++ return 0; ++} ++ ++int ++ctr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, ++ uint32_t flags, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_READ, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_READV_WIND_FAILED, ++ "Failed to insert readv wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, off, flags, xdata); ++ return 0; ++} ++ ++/*******************************ctr_ipc****************************************/ ++ ++/*This is the call back function per record/file from data base*/ ++static int ++ctr_db_query_callback(gfdb_query_record_t *gfdb_query_record, void *args) ++{ ++ int ret = -1; ++ ctr_query_cbk_args_t *query_cbk_args = args; ++ ++ GF_VALIDATE_OR_GOTO("ctr", query_cbk_args, out); ++ ++ ret = gfdb_write_query_record(query_cbk_args->query_fd, gfdb_query_record); ++ if (ret) { ++ gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "Failed to write to query file"); ++ goto out; ++ } ++ ++ query_cbk_args->count++; ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++/* This function does all the db queries related to tiering and ++ * generates/populates new/existing query file ++ * inputs: ++ * xlator_t *this : CTR Translator ++ * void *conn_node : Database connection ++ * char *query_file: the query file that needs to be updated ++ * gfdb_ipc_ctr_params_t *ipc_ctr_params: the query parameters ++ * Return: ++ * On success 0 ++ * On failure -1 ++ * */ ++int ++ctr_db_query(xlator_t *this, void *conn_node, char *query_file, ++ gfdb_ipc_ctr_params_t *ipc_ctr_params) ++{ ++ int ret = -1; ++ ctr_query_cbk_args_t query_cbk_args = {0}; ++ ++ GF_VALIDATE_OR_GOTO("ctr", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, conn_node, out); ++ GF_VALIDATE_OR_GOTO(this->name, query_file, out); ++ GF_VALIDATE_OR_GOTO(this->name, ipc_ctr_params, out); ++ ++ /*Query for eligible files from db*/ ++ query_cbk_args.query_fd = open(query_file, O_WRONLY | O_CREAT | O_APPEND, ++ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); ++ if (query_cbk_args.query_fd < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, CTR_MSG_FATAL_ERROR, ++ "Failed to open query file %s", query_file); ++ goto out; ++ } ++ if (!ipc_ctr_params->is_promote) { ++ if (ipc_ctr_params->emergency_demote) { ++ /* emergency demotion mode */ ++ ret = find_all(conn_node, ctr_db_query_callback, ++ (void *)&query_cbk_args, ++ ipc_ctr_params->query_limit); ++ } else { ++ if (ipc_ctr_params->write_freq_threshold == 0 && ++ ipc_ctr_params->read_freq_threshold == 0) { ++ ret = find_unchanged_for_time(conn_node, ctr_db_query_callback, ++ (void *)&query_cbk_args, ++ &ipc_ctr_params->time_stamp); ++ } else { ++ ret = find_unchanged_for_time_freq( ++ conn_node, ctr_db_query_callback, (void *)&query_cbk_args, ++ &ipc_ctr_params->time_stamp, ++ ipc_ctr_params->write_freq_threshold, ++ ipc_ctr_params->read_freq_threshold, _gf_false); ++ } ++ } ++ } else { ++ if (ipc_ctr_params->write_freq_threshold == 0 && ++ ipc_ctr_params->read_freq_threshold == 0) { ++ ret = find_recently_changed_files(conn_node, ctr_db_query_callback, ++ (void *)&query_cbk_args, ++ &ipc_ctr_params->time_stamp); ++ } else { ++ ret = find_recently_changed_files_freq( ++ conn_node, ctr_db_query_callback, (void *)&query_cbk_args, ++ &ipc_ctr_params->time_stamp, ++ ipc_ctr_params->write_freq_threshold, ++ ipc_ctr_params->read_freq_threshold, _gf_false); ++ } ++ } ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: query from db failed"); ++ goto out; ++ } ++ ++ ret = clear_files_heat(conn_node); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: Failed to clear db entries"); ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ ++ if (!ret) ++ ret = query_cbk_args.count; ++ ++ if (query_cbk_args.query_fd >= 0) { ++ sys_close(query_cbk_args.query_fd); ++ query_cbk_args.query_fd = -1; ++ } ++ ++ return ret; ++} ++ ++void * ++ctr_compact_thread(void *args) ++{ ++ int ret = -1; ++ void *db_conn = NULL; ++ ++ xlator_t *this = NULL; ++ gf_ctr_private_t *priv = NULL; ++ gf_boolean_t compact_active = _gf_false; ++ gf_boolean_t compact_mode_switched = _gf_false; ++ ++ this = (xlator_t *)args; ++ ++ GF_VALIDATE_OR_GOTO("ctr", this, out); ++ ++ priv = this->private; ++ ++ db_conn = priv->_db_conn; ++ compact_active = priv->compact_active; ++ compact_mode_switched = priv->compact_mode_switched; ++ ++ gf_msg("ctr-compact", GF_LOG_INFO, 0, CTR_MSG_SET, "Starting compaction"); ++ ++ ret = compact_db(db_conn, compact_active, compact_mode_switched); ++ ++ if (ret) { ++ gf_msg("ctr-compact", GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to perform the compaction"); ++ } ++ ++ ret = pthread_mutex_lock(&priv->compact_lock); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to acquire lock"); ++ goto out; ++ } ++ ++ /* We are done compaction on this brick. Set all flags to false */ ++ priv->compact_active = _gf_false; ++ priv->compact_mode_switched = _gf_false; ++ ++ ret = pthread_mutex_unlock(&priv->compact_lock); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to release lock"); ++ goto out; ++ } ++ ++out: ++ return NULL; ++} ++ ++int ++ctr_ipc_helper(xlator_t *this, dict_t *in_dict, dict_t *out_dict) ++{ ++ int ret = -1; ++ char *ctr_ipc_ops = NULL; ++ gf_ctr_private_t *priv = NULL; ++ char *db_version = NULL; ++ char *db_param_key = NULL; ++ char *db_param = NULL; ++ char *query_file = NULL; ++ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL; ++ int result = 0; ++ pthread_t compact_thread; ++ ++ GF_VALIDATE_OR_GOTO("ctr", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, this->private, out); ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv->_db_conn, out); ++ GF_VALIDATE_OR_GOTO(this->name, in_dict, out); ++ GF_VALIDATE_OR_GOTO(this->name, out_dict, out); ++ ++ GET_DB_PARAM_FROM_DICT(this->name, in_dict, GFDB_IPC_CTR_KEY, ctr_ipc_ops, ++ out); ++ ++ /*if its a db clear operation */ ++ if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_CLEAR_OPS, ++ SLEN(GFDB_IPC_CTR_CLEAR_OPS)) == 0) { ++ ret = clear_files_heat(priv->_db_conn); ++ if (ret) ++ goto out; ++ ++ } /* if its a query operation, in which case its query + clear db*/ ++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_QUERY_OPS, ++ SLEN(GFDB_IPC_CTR_QUERY_OPS)) == 0) { ++ ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_QFILE_PATH, &query_file); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed extracting query file path"); ++ goto out; ++ } ++ ++ ret = dict_get_bin(in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS, ++ (void *)&ipc_ctr_params); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed extracting query parameters"); ++ goto out; ++ } ++ ++ ret = ctr_db_query(this, priv->_db_conn, query_file, ipc_ctr_params); ++ ++ ret = dict_set_int32(out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, ret); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed setting query reply"); ++ goto out; ++ } ++ ++ } /* if its a query for db version */ ++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_VERSION_OPS, ++ SLEN(GFDB_IPC_CTR_GET_DB_VERSION_OPS)) == 0) { ++ ret = get_db_version(priv->_db_conn, &db_version); ++ if (ret == -1 || !db_version) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed extracting db version "); ++ goto out; ++ } ++ ++ SET_DB_PARAM_TO_DICT(this->name, out_dict, GFDB_IPC_CTR_RET_DB_VERSION, ++ db_version, ret, error); ++ ++ } /* if its a query for a db setting */ ++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_PARAM_OPS, ++ SLEN(GFDB_IPC_CTR_GET_DB_PARAM_OPS)) == 0) { ++ ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_DB_KEY, &db_param_key); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed extracting db param key"); ++ goto out; ++ } ++ ++ ret = get_db_params(priv->_db_conn, db_param_key, &db_param); ++ if (ret == -1 || !db_param) { ++ goto out; ++ } ++ ++ SET_DB_PARAM_TO_DICT(this->name, out_dict, db_param_key, db_param, ret, ++ error); ++ } /* if its an attempt to compact the database */ ++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ++ SLEN(GFDB_IPC_CTR_SET_COMPACT_PRAGMA)) == 0) { ++ ret = pthread_mutex_lock(&priv->compact_lock); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to acquire lock for compaction"); ++ goto out; ++ } ++ ++ if ((priv->compact_active || priv->compact_mode_switched)) { ++ /* Compaction in progress. LEAVE */ ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Compaction already in progress."); ++ pthread_mutex_unlock(&priv->compact_lock); ++ goto out; ++ } ++ /* At this point, we should be the only one on the brick */ ++ /* compacting */ ++ ++ /* Grab the arguments from the dictionary */ ++ ret = dict_get_int32(in_dict, "compact_active", &result); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to get compaction type"); ++ goto out; ++ } ++ ++ if (result) { ++ priv->compact_active = _gf_true; ++ } ++ ++ ret = dict_get_int32(in_dict, "compact_mode_switched", &result); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to see if compaction switched"); ++ goto out; ++ } ++ ++ if (result) { ++ priv->compact_mode_switched = _gf_true; ++ gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET, ++ "Pre-thread: Compact mode switch is true"); ++ } else { ++ gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET, ++ "Pre-thread: Compact mode switch is false"); ++ } ++ ++ ret = pthread_mutex_unlock(&priv->compact_lock); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to release lock for compaction"); ++ goto out; ++ } ++ ++ ret = gf_thread_create(&compact_thread, NULL, ctr_compact_thread, ++ (void *)this, "ctrcomp"); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to spawn compaction thread"); ++ goto out; ++ } ++ ++ goto out; ++ } /* default case */ ++ else { ++ goto out; ++ } ++ ++ ret = 0; ++ goto out; ++error: ++ GF_FREE(db_param_key); ++ GF_FREE(db_param); ++ GF_FREE(db_version); ++out: ++ return ret; ++} ++ ++/* IPC Call from tier migrator to clear the heat on the DB */ ++int32_t ++ctr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *in_dict) ++{ ++ int ret = -1; ++ gf_ctr_private_t *priv = NULL; ++ dict_t *out_dict = NULL; ++ ++ GF_ASSERT(this); ++ priv = this->private; ++ GF_ASSERT(priv); ++ GF_ASSERT(priv->_db_conn); ++ GF_VALIDATE_OR_GOTO(this->name, in_dict, wind); ++ ++ if (op != GF_IPC_TARGET_CTR) ++ goto wind; ++ ++ out_dict = dict_new(); ++ if (!out_dict) { ++ goto out; ++ } ++ ++ ret = ctr_ipc_helper(this, in_dict, out_dict); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed in ctr_ipc_helper"); ++ } ++out: ++ ++ STACK_UNWIND_STRICT(ipc, frame, ret, 0, out_dict); ++ ++ if (out_dict) ++ dict_unref(out_dict); ++ ++ return 0; ++ ++wind: ++ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ipc, op, in_dict); ++ ++ return 0; ++} ++ ++/* Call to initialize db for ctr xlator while ctr is enabled */ ++int32_t ++initialize_ctr_resource(xlator_t *this, gf_ctr_private_t *priv) ++{ ++ int ret_db = -1; ++ dict_t *params_dict = NULL; ++ ++ if (!priv) ++ goto error; ++ ++ /* For compaction */ ++ priv->compact_active = _gf_false; ++ priv->compact_mode_switched = _gf_false; ++ ret_db = pthread_mutex_init(&priv->compact_lock, NULL); ++ ++ if (ret_db) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: Failed initializing compaction mutex"); ++ goto error; ++ } ++ ++ params_dict = dict_new(); ++ if (!params_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INIT_DB_PARAMS_FAILED, ++ "DB Params cannot initialized!"); ++ goto error; ++ } ++ ++ /*Extract db params options*/ ++ ret_db = extract_db_params(this, params_dict, priv->gfdb_db_type); ++ if (ret_db) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED, ++ "Failed extracting db params options"); ++ goto error; ++ } ++ ++ /*Create a memory pool for ctr xlator*/ ++ this->local_pool = mem_pool_new(gf_ctr_local_t, 64); ++ if (!this->local_pool) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED, ++ "failed to create local memory pool"); ++ goto error; ++ } ++ ++ /*Initialize Database Connection*/ ++ priv->_db_conn = init_db(params_dict, priv->gfdb_db_type); ++ if (!priv->_db_conn) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: Failed initializing data base"); ++ goto error; ++ } ++ ++ ret_db = 0; ++ goto out; ++ ++error: ++ if (this) ++ mem_pool_destroy(this->local_pool); ++ ++ if (priv) { ++ GF_FREE(priv->ctr_db_path); ++ } ++ GF_FREE(priv); ++ ret_db = -1; ++out: ++ if (params_dict) ++ dict_unref(params_dict); ++ ++ return ret_db; ++} ++ ++/******************************************************************************/ ++int ++reconfigure(xlator_t *this, dict_t *options) ++{ ++ char *temp_str = NULL; ++ int ret = 0; ++ gf_ctr_private_t *priv = NULL; ++ ++ priv = this->private; ++ ++ if (dict_get_str(options, "changetimerecorder.frequency", &temp_str)) { ++ gf_msg(this->name, GF_LOG_TRACE, 0, CTR_MSG_SET, "set"); ++ } ++ ++ GF_OPTION_RECONF("ctr-enabled", priv->enabled, options, bool, out); ++ if (!priv->enabled) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED, ++ "CTR Xlator is not enabled so skip ctr reconfigure"); ++ goto out; ++ } ++ ++ /* If ctr is enabled after skip init for ctr xlator then call ++ initialize_ctr_resource during reconfigure phase to allocate resources ++ for xlator ++ */ ++ if (priv->enabled && !priv->_db_conn) { ++ ret = initialize_ctr_resource(this, priv); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: Failed ctr initialize resource"); ++ goto out; ++ } ++ } ++ ++ GF_OPTION_RECONF("record-counters", priv->ctr_record_counter, options, bool, ++ out); ++ ++ GF_OPTION_RECONF("ctr-record-metadata-heat", priv->ctr_record_metadata_heat, ++ options, bool, out); ++ ++ GF_OPTION_RECONF("ctr_link_consistency", priv->ctr_link_consistency, ++ options, bool, out); ++ ++ GF_OPTION_RECONF("ctr_lookupheal_inode_timeout", ++ priv->ctr_lookupheal_inode_timeout, options, uint64, out); ++ ++ GF_OPTION_RECONF("ctr_lookupheal_link_timeout", ++ priv->ctr_lookupheal_link_timeout, options, uint64, out); ++ ++ GF_OPTION_RECONF("record-exit", priv->ctr_record_unwind, options, bool, ++ out); ++ ++ GF_OPTION_RECONF("record-entry", priv->ctr_record_wind, options, bool, out); ++ ++ /* If database is sqlite */ ++ if (priv->gfdb_db_type == GFDB_SQLITE3) { ++ /* AUTOCHECKPOINT */ ++ if (dict_get_str(options, GFDB_SQL_PARAM_WAL_AUTOCHECK, &temp_str) == ++ 0) { ++ ret = set_db_params(priv->_db_conn, "wal_autocheckpoint", temp_str); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, ++ "Failed to set %s", GFDB_SQL_PARAM_WAL_AUTOCHECK); ++ } ++ } ++ ++ /* CACHE_SIZE */ ++ if (dict_get_str(options, GFDB_SQL_PARAM_CACHE_SIZE, &temp_str) == 0) { ++ ret = set_db_params(priv->_db_conn, "cache_size", temp_str); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, ++ "Failed to set %s", GFDB_SQL_PARAM_CACHE_SIZE); ++ } ++ } ++ } ++ ++ ret = 0; ++ ++out: ++ ++ return ret; ++} ++ ++/****************************init********************************************/ ++ ++int32_t ++init(xlator_t *this) ++{ ++ gf_ctr_private_t *priv = NULL; ++ int ret_db = -1; ++ ++ if (!this) { ++ gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: ctr this is not initialized"); ++ return -1; ++ } ++ ++ if (!this->children || this->children->next) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: ctr should have exactly one child"); ++ return -1; ++ } ++ ++ if (!this->parents) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DANGLING_VOLUME, ++ "dangling volume. check volfile "); ++ } ++ ++ priv = GF_CALLOC(1, sizeof(*priv), gf_ctr_mt_private_t); ++ if (!priv) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED, ++ "Calloc did not work!!!"); ++ return -1; ++ } ++ ++ /*Default values for the translator*/ ++ priv->ctr_record_wind = _gf_true; ++ priv->ctr_record_unwind = _gf_false; ++ priv->ctr_hot_brick = _gf_false; ++ priv->gfdb_db_type = GFDB_SQLITE3; ++ priv->gfdb_sync_type = GFDB_DB_SYNC; ++ priv->_db_conn = NULL; ++ priv->ctr_lookupheal_link_timeout = CTR_DEFAULT_HARDLINK_EXP_PERIOD; ++ priv->ctr_lookupheal_inode_timeout = CTR_DEFAULT_INODE_EXP_PERIOD; ++ ++ /*Extract ctr xlator options*/ ++ ret_db = extract_ctr_options(this, priv); ++ if (ret_db) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED, ++ "Failed extracting ctr xlator options"); ++ GF_FREE(priv); ++ return -1; ++ } ++ ++ if (!priv->enabled) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED, ++ "CTR Xlator is not enabled so skip ctr init"); ++ goto out; ++ } ++ ++ ret_db = initialize_ctr_resource(this, priv); ++ if (ret_db) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: Failed ctr initialize resource"); ++ return -1; ++ } ++ ++out: ++ this->private = (void *)priv; ++ return 0; ++} ++ ++int ++notify(xlator_t *this, int event, void *data, ...) ++{ ++ gf_ctr_private_t *priv = NULL; ++ int ret = 0; ++ ++ priv = this->private; ++ ++ if (!priv) ++ goto out; ++ ++ ret = default_notify(this, event, data); ++ ++out: ++ return ret; ++} ++ ++int32_t ++mem_acct_init(xlator_t *this) ++{ ++ int ret = -1; ++ ++ GF_VALIDATE_OR_GOTO("ctr", this, out); ++ ++ ret = xlator_mem_acct_init(this, gf_ctr_mt_end + 1); ++ ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_MEM_ACC_INIT_FAILED, ++ "Memory accounting init" ++ "failed"); ++ return ret; ++ } ++out: ++ return ret; ++} ++ ++void ++fini(xlator_t *this) ++{ ++ gf_ctr_private_t *priv = NULL; ++ ++ priv = this->private; ++ ++ if (priv && priv->enabled) { ++ if (fini_db(priv->_db_conn)) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED, ++ "Failed closing " ++ "db connection"); ++ } ++ ++ if (priv->_db_conn) ++ priv->_db_conn = NULL; ++ ++ GF_FREE(priv->ctr_db_path); ++ if (pthread_mutex_destroy(&priv->compact_lock)) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED, ++ "Failed to " ++ "destroy the compaction mutex"); ++ } ++ } ++ GF_FREE(priv); ++ mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; ++ ++ return; ++} ++ ++struct xlator_fops fops = { ++ /*lookup*/ ++ .lookup = ctr_lookup, ++ /*write fops */ ++ .mknod = ctr_mknod, ++ .create = ctr_create, ++ .truncate = ctr_truncate, ++ .ftruncate = ctr_ftruncate, ++ .setxattr = ctr_setxattr, ++ .fsetxattr = ctr_fsetxattr, ++ .removexattr = ctr_removexattr, ++ .fremovexattr = ctr_fremovexattr, ++ .unlink = ctr_unlink, ++ .link = ctr_link, ++ .rename = ctr_rename, ++ .writev = ctr_writev, ++ .setattr = ctr_setattr, ++ .fsetattr = ctr_fsetattr, ++ /*read fops*/ ++ .readv = ctr_readv, ++ /* IPC call*/ ++ .ipc = ctr_ipc}; ++ ++struct xlator_cbks cbks = {.forget = ctr_forget}; ++ ++struct volume_options options[] = { ++ {.key = ++ { ++ "ctr-enabled", ++ }, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off", ++ .description = "Enables the CTR", ++ .flags = OPT_FLAG_SETTABLE}, ++ {.key = {"record-entry"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "on"}, ++ {.key = {"record-exit"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off"}, ++ {.key = {"record-counters"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off", ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .flags = OPT_FLAG_SETTABLE, ++ .tags = {}}, ++ {.key = {"ctr-record-metadata-heat"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {"ctr_link_consistency"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {"ctr_lookupheal_link_timeout"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "300", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_2}, ++ .tags = {}}, ++ {.key = {"ctr_lookupheal_inode_timeout"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "300", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_2}, ++ .tags = {}}, ++ {.key = {"hot-brick"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off"}, ++ {.key = {"db-type"}, ++ .type = GF_OPTION_TYPE_STR, ++ .value = {"hashfile", "rocksdb", "changelog", "sqlite3", "hyperdex"}, ++ .default_value = "sqlite3", ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .flags = OPT_FLAG_SETTABLE, ++ .tags = {}}, ++ {.key = {"db-sync"}, ++ .type = GF_OPTION_TYPE_STR, ++ .value = {"sync", "async"}, ++ .default_value = "sync"}, ++ {.key = {"db-path"}, .type = GF_OPTION_TYPE_PATH}, ++ {.key = {"db-name"}, .type = GF_OPTION_TYPE_STR}, ++ {.key = {GFDB_SQL_PARAM_SYNC}, ++ .type = GF_OPTION_TYPE_STR, ++ .value = {"off", "normal", "full"}, ++ .default_value = "normal"}, ++ {.key = {GFDB_SQL_PARAM_JOURNAL_MODE}, ++ .type = GF_OPTION_TYPE_STR, ++ .value = {"delete", "truncate", "persist", "memory", "wal", "off"}, ++ .default_value = "wal", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {GFDB_SQL_PARAM_AUTO_VACUUM}, ++ .type = GF_OPTION_TYPE_STR, ++ .value = {"off", "full", "incr"}, ++ .default_value = "off", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {GFDB_SQL_PARAM_WAL_AUTOCHECK}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "25000", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {GFDB_SQL_PARAM_CACHE_SIZE}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "12500", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {GFDB_SQL_PARAM_PAGE_SIZE}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "4096", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {NULL}}, ++}; ++ ++xlator_api_t xlator_api = { ++ .init = init, ++ .fini = fini, ++ .notify = notify, ++ .reconfigure = reconfigure, ++ .mem_acct_init = mem_acct_init, ++ .op_version = {GD_OP_VERSION_3_7_0}, /* Present from the initial version */ ++ .fops = &fops, ++ .cbks = &cbks, ++ .identifier = "changetimerecorder", ++ .category = GF_MAINTAINED, ++ .options = options, ++}; +diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.h b/xlators/features/changetimerecorder/src/changetimerecorder.h +new file mode 100644 +index 0000000..0150a1c +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/changetimerecorder.h +@@ -0,0 +1,21 @@ ++/* ++ Copyright (c) 2006-2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __CTR_H ++#define __CTR_H ++ ++#include ++#include ++#include ++#include ++#include "ctr_mem_types.h" ++#include "ctr-helper.h" ++ ++#endif /* __CTR_H */ +diff --git a/xlators/features/changetimerecorder/src/ctr-helper.c b/xlators/features/changetimerecorder/src/ctr-helper.c +new file mode 100644 +index 0000000..e1e6573 +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr-helper.c +@@ -0,0 +1,293 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include "gfdb_sqlite3.h" ++#include "ctr-helper.h" ++#include "ctr-messages.h" ++ ++/******************************************************************************* ++ * ++ * Fill unwind into db record ++ * ++ ******************************************************************************/ ++int ++fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local, ++ gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path) ++{ ++ int ret = -1; ++ gfdb_time_t *ctr_uwtime = NULL; ++ gf_ctr_private_t *_priv = NULL; ++ ++ GF_ASSERT(this); ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ ++ GF_ASSERT(ctr_local); ++ ++ /*If not unwind path error*/ ++ if (!isunwindpath(fop_path)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH, ++ "Wrong fop_path. Should be unwind"); ++ goto out; ++ } ++ ++ ctr_uwtime = &CTR_DB_REC(ctr_local).gfdb_unwind_change_time; ++ CTR_DB_REC(ctr_local).gfdb_fop_path = fop_path; ++ CTR_DB_REC(ctr_local).gfdb_fop_type = fop_type; ++ ++ ret = gettimeofday(ctr_uwtime, NULL); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, ++ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, ++ "Error " ++ "filling unwind time record %s", ++ strerror(errno)); ++ goto out; ++ } ++ ++ /* Special case i.e if its a tier rebalance ++ * + cold tier brick ++ * + its a create/mknod FOP ++ * we record unwind time as zero */ ++ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG && ++ (!_priv->ctr_hot_brick) && isdentrycreatefop(fop_type)) { ++ memset(ctr_uwtime, 0, sizeof(*ctr_uwtime)); ++ } ++ ret = 0; ++out: ++ return ret; ++} ++ ++/******************************************************************************* ++ * ++ * Fill wind into db record ++ * ++ ******************************************************************************/ ++int ++fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local, ++ gf_ctr_inode_context_t *ctr_inode_cx) ++{ ++ int ret = -1; ++ gfdb_time_t *ctr_wtime = NULL; ++ gf_ctr_private_t *_priv = NULL; ++ ++ GF_ASSERT(this); ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ GF_ASSERT(ctr_local); ++ IS_CTR_INODE_CX_SANE(ctr_inode_cx); ++ ++ /*if not wind path error!*/ ++ if (!iswindpath(ctr_inode_cx->fop_path)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH, ++ "Wrong fop_path. Should be wind"); ++ goto out; ++ } ++ ++ ctr_wtime = &CTR_DB_REC(ctr_local).gfdb_wind_change_time; ++ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path; ++ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type; ++ CTR_DB_REC(ctr_local).link_consistency = _priv->ctr_link_consistency; ++ ++ ret = gettimeofday(ctr_wtime, NULL); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, ++ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, ++ "Error filling wind time record %s", strerror(errno)); ++ goto out; ++ } ++ ++ /* Special case i.e if its a tier rebalance ++ * + cold tier brick ++ * + its a create/mknod FOP ++ * we record wind time as zero */ ++ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG && ++ (!_priv->ctr_hot_brick) && isdentrycreatefop(ctr_inode_cx->fop_type)) { ++ memset(ctr_wtime, 0, sizeof(*ctr_wtime)); ++ } ++ ++ /* Copy gfid into db record */ ++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid)); ++ ++ /* Copy older gfid if any */ ++ if (ctr_inode_cx->old_gfid && ++ (!gf_uuid_is_null(*(ctr_inode_cx->old_gfid)))) { ++ gf_uuid_copy(CTR_DB_REC(ctr_local).old_gfid, *(ctr_inode_cx->old_gfid)); ++ } ++ ++ /*Hard Links*/ ++ if (isdentryfop(ctr_inode_cx->fop_type)) { ++ /*new link fop*/ ++ if (NEW_LINK_CX(ctr_inode_cx)) { ++ gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid, ++ *((NEW_LINK_CX(ctr_inode_cx))->pargfid)); ++ strcpy(CTR_DB_REC(ctr_local).file_name, ++ NEW_LINK_CX(ctr_inode_cx)->basename); ++ } ++ /*rename fop*/ ++ if (OLD_LINK_CX(ctr_inode_cx)) { ++ gf_uuid_copy(CTR_DB_REC(ctr_local).old_pargfid, ++ *((OLD_LINK_CX(ctr_inode_cx))->pargfid)); ++ strcpy(CTR_DB_REC(ctr_local).old_file_name, ++ OLD_LINK_CX(ctr_inode_cx)->basename); ++ } ++ } ++ ++ ret = 0; ++out: ++ /*On error roll back and clean the record*/ ++ if (ret == -1) { ++ CLEAR_CTR_DB_RECORD(ctr_local); ++ } ++ return ret; ++} ++ ++/****************************************************************************** ++ * ++ * CTR xlator init related functions ++ * ++ * ++ * ****************************************************************************/ ++static int ++extract_sql_params(xlator_t *this, dict_t *params_dict) ++{ ++ int ret = -1; ++ char *db_path = NULL; ++ char *db_name = NULL; ++ char *db_full_path = NULL; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(params_dict); ++ ++ /*Extract the path of the db*/ ++ db_path = NULL; ++ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-path", ++ db_path, "/var/run/gluster/"); ++ ++ /*Extract the name of the db*/ ++ db_name = NULL; ++ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-name", ++ db_name, "gf_ctr_db.db"); ++ ++ /*Construct full path of the db*/ ++ ret = gf_asprintf(&db_full_path, "%s/%s", db_path, db_name); ++ if (ret < 0) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, ++ CTR_MSG_CONSTRUCT_DB_PATH_FAILED, ++ "Construction of full db path failed!"); ++ goto out; ++ } ++ ++ /*Setting the SQL DB Path*/ ++ SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH, ++ db_full_path, ret, out); ++ ++ /*Extract rest of the sql params*/ ++ ret = gfdb_set_sql_params(this->name, this->options, params_dict); ++ if (ret) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, ++ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, ++ "Failed setting values to sql param dict!"); ++ } ++ ++ ret = 0; ++ ++out: ++ if (ret) ++ GF_FREE(db_full_path); ++ return ret; ++} ++ ++int ++extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type) ++{ ++ int ret = -1; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(params_dict); ++ ++ switch (db_type) { ++ case GFDB_SQLITE3: ++ ret = extract_sql_params(this, params_dict); ++ if (ret) ++ goto out; ++ break; ++ case GFDB_ROCKS_DB: ++ case GFDB_HYPERDEX: ++ case GFDB_HASH_FILE_STORE: ++ case GFDB_INVALID_DB: ++ case GFDB_DB_END: ++ goto out; ++ } ++ ret = 0; ++out: ++ return ret; ++} ++ ++int ++extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv) ++{ ++ int ret = -1; ++ char *_val_str = NULL; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(_priv); ++ ++ /*Checking if the CTR Translator is enabled. By default its disabled*/ ++ _priv->enabled = _gf_false; ++ GF_OPTION_INIT("ctr-enabled", _priv->enabled, bool, out); ++ if (!_priv->enabled) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED, ++ "CTR Xlator is disabled."); ++ ret = 0; ++ goto out; ++ } ++ ++ /*Extract db type*/ ++ GF_OPTION_INIT("db-type", _val_str, str, out); ++ _priv->gfdb_db_type = gf_string2gfdbdbtype(_val_str); ++ ++ /*Extract flag for record on wind*/ ++ GF_OPTION_INIT("record-entry", _priv->ctr_record_wind, bool, out); ++ ++ /*Extract flag for record on unwind*/ ++ GF_OPTION_INIT("record-exit", _priv->ctr_record_unwind, bool, out); ++ ++ /*Extract flag for record on counters*/ ++ GF_OPTION_INIT("record-counters", _priv->ctr_record_counter, bool, out); ++ ++ /* Extract flag for record metadata heat */ ++ GF_OPTION_INIT("ctr-record-metadata-heat", _priv->ctr_record_metadata_heat, ++ bool, out); ++ ++ /*Extract flag for link consistency*/ ++ GF_OPTION_INIT("ctr_link_consistency", _priv->ctr_link_consistency, bool, ++ out); ++ ++ /*Extract ctr_lookupheal_inode_timeout */ ++ GF_OPTION_INIT("ctr_lookupheal_inode_timeout", ++ _priv->ctr_lookupheal_inode_timeout, uint64, out); ++ ++ /*Extract ctr_lookupheal_link_timeout*/ ++ GF_OPTION_INIT("ctr_lookupheal_link_timeout", ++ _priv->ctr_lookupheal_link_timeout, uint64, out); ++ ++ /*Extract flag for hot tier brick*/ ++ GF_OPTION_INIT("hot-brick", _priv->ctr_hot_brick, bool, out); ++ ++ /*Extract flag for sync mode*/ ++ GF_OPTION_INIT("db-sync", _val_str, str, out); ++ _priv->gfdb_sync_type = gf_string2gfdbdbsync(_val_str); ++ ++ ret = 0; ++ ++out: ++ return ret; ++} +diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h +new file mode 100644 +index 0000000..517fbb0 +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr-helper.h +@@ -0,0 +1,854 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __CTR_HELPER_H ++#define __CTR_HELPER_H ++ ++#include ++#include "ctr_mem_types.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "gfdb_data_store.h" ++#include "ctr-xlator-ctx.h" ++#include "ctr-messages.h" ++ ++#define CTR_DEFAULT_HARDLINK_EXP_PERIOD 300 /* Five mins */ ++#define CTR_DEFAULT_INODE_EXP_PERIOD 300 /* Five mins */ ++ ++typedef struct ctr_query_cbk_args { ++ int query_fd; ++ int count; ++} ctr_query_cbk_args_t; ++ ++/*CTR Xlator Private structure*/ ++typedef struct gf_ctr_private { ++ gf_boolean_t enabled; ++ char *ctr_db_path; ++ gf_boolean_t ctr_hot_brick; ++ gf_boolean_t ctr_record_wind; ++ gf_boolean_t ctr_record_unwind; ++ gf_boolean_t ctr_record_counter; ++ gf_boolean_t ctr_record_metadata_heat; ++ gf_boolean_t ctr_link_consistency; ++ gfdb_db_type_t gfdb_db_type; ++ gfdb_sync_type_t gfdb_sync_type; ++ gfdb_conn_node_t *_db_conn; ++ uint64_t ctr_lookupheal_link_timeout; ++ uint64_t ctr_lookupheal_inode_timeout; ++ gf_boolean_t compact_active; ++ gf_boolean_t compact_mode_switched; ++ pthread_mutex_t compact_lock; ++} gf_ctr_private_t; ++ ++/* ++ * gf_ctr_local_t is the ctr xlator local data structure that is stored in ++ * the call_frame of each FOP. ++ * ++ * gfdb_db_record: The gf_ctr_local contains a gfdb_db_record object, which is ++ * used by the insert_record() api from the libgfdb. The gfdb_db_record object ++ * will contain all the inode and hardlink(only for dentry fops: create, ++ * mknod,link, unlink, rename).The ctr_local is keep alive till the unwind ++ * call and will be release during the unwind. The same gfdb_db_record will ++ * used for the unwind insert_record() api, to record unwind in the database. ++ * ++ * ia_inode_type in gf_ctr_local will tell the type of the inode. This is ++ * important for during the unwind path. As we will not have the inode during ++ * the unwind path. We would have include this in the gfdb_db_record itself ++ * but currently we record only file inode information. ++ * ++ * is_internal_fop in gf_ctr_local will tell us if this is a internal fop and ++ * take special/no action. We don't record change/access times or increement ++ * heat counter for internal fops from rebalancer. ++ * */ ++typedef struct gf_ctr_local { ++ gfdb_db_record_t gfdb_db_record; ++ ia_type_t ia_inode_type; ++ gf_boolean_t is_internal_fop; ++ gf_special_pid_t client_pid; ++} gf_ctr_local_t; ++/* ++ * Easy access of gfdb_db_record of ctr_local ++ * */ ++#define CTR_DB_REC(ctr_local) (ctr_local->gfdb_db_record) ++ ++/*Clear db record*/ ++#define CLEAR_CTR_DB_RECORD(ctr_local) \ ++ do { \ ++ ctr_local->gfdb_db_record.gfdb_fop_path = GFDB_FOP_INVALID; \ ++ memset(&(ctr_local->gfdb_db_record.gfdb_wind_change_time), 0, \ ++ sizeof(gfdb_time_t)); \ ++ memset(&(ctr_local->gfdb_db_record.gfdb_unwind_change_time), 0, \ ++ sizeof(gfdb_time_t)); \ ++ gf_uuid_clear(ctr_local->gfdb_db_record.gfid); \ ++ gf_uuid_clear(ctr_local->gfdb_db_record.pargfid); \ ++ memset(ctr_local->gfdb_db_record.file_name, 0, GF_NAME_MAX + 1); \ ++ memset(ctr_local->gfdb_db_record.old_file_name, 0, GF_NAME_MAX + 1); \ ++ ctr_local->gfdb_db_record.gfdb_fop_type = GFDB_FOP_INVALID_OP; \ ++ ctr_local->ia_inode_type = IA_INVAL; \ ++ } while (0) ++ ++static gf_ctr_local_t * ++init_ctr_local_t(xlator_t *this) ++{ ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(this); ++ ++ ctr_local = mem_get0(this->local_pool); ++ if (!ctr_local) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, ++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, ++ "Error while creating ctr local"); ++ goto out; ++ } ++ ++ CLEAR_CTR_DB_RECORD(ctr_local); ++out: ++ return ctr_local; ++} ++ ++static void ++free_ctr_local(gf_ctr_local_t *ctr_local) ++{ ++ if (ctr_local) ++ mem_put(ctr_local); ++} ++ ++/****************************************************************************** ++ * ++ * ++ * Context Carrier Structures ++ * ++ * ++ * ****************************************************************************/ ++ ++/* ++ * Context Carrier structures are used to carry relevant information about ++ * inodes and links from the fops calls to the ctr_insert_wind. ++ * These structure just have pointers to the original data and donot ++ * do a deep copy of any data. This info is deep copied to ++ * ctr_local->gfdb_db_record and passed to insert_record() api of libgfdb. This ++ * info remains persistent for the unwind in ctr_local->gfdb_db_record ++ * and once used will be destroyed. ++ * ++ * gf_ctr_link_context_t : Context structure for hard links ++ * gf_ctr_inode_context_t : Context structure for inodes ++ * ++ * */ ++ ++/*Context Carrier Structure for hard links*/ ++typedef struct gf_ctr_link_context { ++ uuid_t *pargfid; ++ const char *basename; ++} gf_ctr_link_context_t; ++ ++/*Context Carrier Structure for inodes*/ ++typedef struct gf_ctr_inode_context { ++ ia_type_t ia_type; ++ uuid_t *gfid; ++ uuid_t *old_gfid; ++ gf_ctr_link_context_t *new_link_cx; ++ gf_ctr_link_context_t *old_link_cx; ++ gfdb_fop_type_t fop_type; ++ gfdb_fop_path_t fop_path; ++ gf_boolean_t is_internal_fop; ++ /* Indicating metadata fops */ ++ gf_boolean_t is_metadata_fop; ++} gf_ctr_inode_context_t; ++ ++/*******************Util Macros for Context Carrier Structures*****************/ ++ ++/*Checks if ctr_link_cx is sane!*/ ++#define IS_CTR_LINK_CX_SANE(ctr_link_cx) \ ++ do { \ ++ if (ctr_link_cx) { \ ++ if (ctr_link_cx->pargfid) \ ++ GF_ASSERT(*(ctr_link_cx->pargfid)); \ ++ GF_ASSERT(ctr_link_cx->basename); \ ++ }; \ ++ } while (0) ++ ++/*Clear and fill the ctr_link_context with values*/ ++#define FILL_CTR_LINK_CX(ctr_link_cx, _pargfid, _basename, label) \ ++ do { \ ++ GF_VALIDATE_OR_GOTO("ctr", ctr_link_cx, label); \ ++ GF_VALIDATE_OR_GOTO("ctr", _pargfid, label); \ ++ GF_VALIDATE_OR_GOTO("ctr", _basename, label); \ ++ memset(ctr_link_cx, 0, sizeof(*ctr_link_cx)); \ ++ ctr_link_cx->pargfid = &_pargfid; \ ++ ctr_link_cx->basename = _basename; \ ++ } while (0) ++ ++#define NEW_LINK_CX(ctr_inode_cx) ctr_inode_cx->new_link_cx ++ ++#define OLD_LINK_CX(ctr_inode_cx) ctr_inode_cx->old_link_cx ++ ++/*Checks if ctr_inode_cx is sane!*/ ++#define IS_CTR_INODE_CX_SANE(ctr_inode_cx) \ ++ do { \ ++ GF_ASSERT(ctr_inode_cx); \ ++ GF_ASSERT(ctr_inode_cx->gfid); \ ++ GF_ASSERT(*(ctr_inode_cx->gfid)); \ ++ GF_ASSERT(ctr_inode_cx->fop_type != GFDB_FOP_INVALID_OP); \ ++ GF_ASSERT(ctr_inode_cx->fop_path != GFDB_FOP_INVALID); \ ++ IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \ ++ IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \ ++ } while (0) ++ ++/*Clear and fill the ctr_inode_context with values*/ ++#define FILL_CTR_INODE_CONTEXT(ctr_inode_cx, _ia_type, _gfid, _new_link_cx, \ ++ _old_link_cx, _fop_type, _fop_path) \ ++ do { \ ++ GF_ASSERT(ctr_inode_cx); \ ++ GF_ASSERT(_gfid); \ ++ GF_ASSERT(_fop_type != GFDB_FOP_INVALID_OP); \ ++ GF_ASSERT(_fop_path != GFDB_FOP_INVALID); \ ++ memset(ctr_inode_cx, 0, sizeof(*ctr_inode_cx)); \ ++ ctr_inode_cx->ia_type = _ia_type; \ ++ ctr_inode_cx->gfid = &_gfid; \ ++ IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \ ++ if (_new_link_cx) \ ++ NEW_LINK_CX(ctr_inode_cx) = _new_link_cx; \ ++ IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \ ++ if (_old_link_cx) \ ++ OLD_LINK_CX(ctr_inode_cx) = _old_link_cx; \ ++ ctr_inode_cx->fop_type = _fop_type; \ ++ ctr_inode_cx->fop_path = _fop_path; \ ++ } while (0) ++ ++/****************************************************************************** ++ * ++ * Util functions or macros used by ++ * insert wind and insert unwind ++ * ++ * ****************************************************************************/ ++/* Free ctr frame local */ ++static inline void ++ctr_free_frame_local(call_frame_t *frame) ++{ ++ if (frame) { ++ free_ctr_local((gf_ctr_local_t *)frame->local); ++ frame->local = NULL; ++ } ++} ++ ++/* Setting GF_REQUEST_LINK_COUNT_XDATA in dict ++ * that has to be sent to POSIX Xlator to send ++ * link count in unwind path. ++ * return 0 for success with not creation of dict ++ * return 1 for success with creation of dict ++ * return -1 for failure. ++ * */ ++static inline int ++set_posix_link_request(xlator_t *this, dict_t **xdata) ++{ ++ int ret = -1; ++ gf_boolean_t is_created = _gf_false; ++ ++ GF_VALIDATE_OR_GOTO("ctr", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, xdata, out); ++ ++ /*create xdata if NULL*/ ++ if (!*xdata) { ++ *xdata = dict_new(); ++ is_created = _gf_true; ++ ret = 1; ++ } else { ++ ret = 0; ++ } ++ ++ if (!*xdata) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL, ++ "xdata is NULL :Cannot send " ++ "GF_REQUEST_LINK_COUNT_XDATA to posix"); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = dict_set_int32(*xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ "Failed setting GF_REQUEST_LINK_COUNT_XDATA"); ++ ret = -1; ++ goto out; ++ } ++ ret = 0; ++out: ++ if (ret == -1) { ++ if (*xdata && is_created) { ++ dict_unref(*xdata); ++ } ++ } ++ return ret; ++} ++ ++/* ++ * If a bitrot fop ++ * */ ++#define BITROT_FOP(frame) \ ++ (frame->root->pid == GF_CLIENT_PID_BITD || \ ++ frame->root->pid == GF_CLIENT_PID_SCRUB) ++ ++/* ++ * If a rebalancer fop ++ * */ ++#define REBALANCE_FOP(frame) (frame->root->pid == GF_CLIENT_PID_DEFRAG) ++ ++/* ++ * If its a tiering rebalancer fop ++ * */ ++#define TIER_REBALANCE_FOP(frame) \ ++ (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG) ++ ++/* ++ * If its a AFR SELF HEAL ++ * */ ++#define AFR_SELF_HEAL_FOP(frame) (frame->root->pid == GF_CLIENT_PID_SELF_HEALD) ++ ++/* ++ * if a rebalancer fop goto ++ * */ ++#define CTR_IF_REBALANCE_FOP_THEN_GOTO(frame, label) \ ++ do { \ ++ if (REBALANCE_FOP(frame)) \ ++ goto label; \ ++ } while (0) ++ ++/* ++ * Internal fop ++ * ++ * */ ++static inline gf_boolean_t ++is_internal_fop(call_frame_t *frame, dict_t *xdata) ++{ ++ gf_boolean_t ret = _gf_false; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ ++ if (AFR_SELF_HEAL_FOP(frame)) { ++ ret = _gf_true; ++ } ++ if (BITROT_FOP(frame)) { ++ ret = _gf_true; ++ } ++ if (REBALANCE_FOP(frame) || TIER_REBALANCE_FOP(frame)) { ++ ret = _gf_true; ++ if (xdata && dict_get(xdata, CTR_ATTACH_TIER_LOOKUP)) { ++ ret = _gf_false; ++ } ++ } ++ if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { ++ ret = _gf_true; ++ } ++ ++ return ret; ++} ++ ++#define CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) \ ++ do { \ ++ if (is_internal_fop(frame, dict)) \ ++ goto label; \ ++ } while (0) ++ ++/* if fop has failed exit */ ++#define CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, label) \ ++ do { \ ++ if (op_ret == -1) { \ ++ gf_msg_trace(this->name, 0, "Failed fop with %s", \ ++ strerror(op_errno)); \ ++ goto label; \ ++ }; \ ++ } while (0) ++ ++/* ++ * IS CTR Xlator is disabled then goto to label ++ * */ ++#define CTR_IS_DISABLED_THEN_GOTO(this, label) \ ++ do { \ ++ gf_ctr_private_t *_priv = NULL; \ ++ GF_ASSERT(this); \ ++ GF_ASSERT(this->private); \ ++ _priv = this->private; \ ++ if (!_priv->_db_conn) \ ++ goto label; \ ++ } while (0) ++ ++/* ++ * IS CTR record metadata heat is disabled then goto to label ++ * */ ++#define CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, label) \ ++ do { \ ++ gf_ctr_private_t *_priv = NULL; \ ++ GF_ASSERT(this); \ ++ GF_ASSERT(this->private); \ ++ _priv = this->private; \ ++ if (!_priv->ctr_record_metadata_heat) \ ++ goto label; \ ++ } while (0) ++ ++int ++fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local, ++ gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path); ++ ++int ++fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local, ++ gf_ctr_inode_context_t *ctr_inode_cx); ++ ++/******************************************************************************* ++ * CTR INSERT WIND ++ * ***************************************************************************** ++ * Function used to insert/update record into the database during a wind fop ++ * This function creates ctr_local structure into the frame of the fop ++ * call. ++ * ****************************************************************************/ ++ ++static inline int ++ctr_insert_wind(call_frame_t *frame, xlator_t *this, ++ gf_ctr_inode_context_t *ctr_inode_cx) ++{ ++ int ret = -1; ++ gf_ctr_private_t *_priv = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ GF_ASSERT(this); ++ IS_CTR_INODE_CX_SANE(ctr_inode_cx); ++ ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ ++ GF_ASSERT(_priv->_db_conn); ++ ++ /*If record_wind option of CTR is on record wind for ++ * regular files only*/ ++ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) { ++ frame->local = init_ctr_local_t(this); ++ if (!frame->local) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, ++ "WIND: Error while creating ctr local"); ++ goto out; ++ }; ++ ctr_local = frame->local; ++ ctr_local->client_pid = frame->root->pid; ++ ctr_local->is_internal_fop = ctr_inode_cx->is_internal_fop; ++ ++ /* Decide whether to record counters or not */ ++ CTR_DB_REC(ctr_local).do_record_counters = _gf_false; ++ /* If record counter is enabled */ ++ if (_priv->ctr_record_counter) { ++ /* If not a internal fop */ ++ if (!(ctr_local->is_internal_fop)) { ++ /* If its a metadata fop AND ++ * record metadata heat ++ * OR ++ * its NOT a metadata fop */ ++ if ((ctr_inode_cx->is_metadata_fop && ++ _priv->ctr_record_metadata_heat) || ++ (!ctr_inode_cx->is_metadata_fop)) { ++ CTR_DB_REC(ctr_local).do_record_counters = _gf_true; ++ } ++ } ++ } ++ ++ /* Decide whether to record times or not ++ * For non internal FOPS record times as usual*/ ++ CTR_DB_REC(ctr_local).do_record_times = _gf_false; ++ if (!ctr_local->is_internal_fop) { ++ /* If its a metadata fop AND ++ * record metadata heat ++ * OR ++ * its NOT a metadata fop */ ++ if ((ctr_inode_cx->is_metadata_fop && ++ _priv->ctr_record_metadata_heat) || ++ (!ctr_inode_cx->is_metadata_fop)) { ++ CTR_DB_REC(ctr_local).do_record_times = ++ (_priv->ctr_record_wind || _priv->ctr_record_unwind); ++ } ++ } ++ /* when its a internal FOPS*/ ++ else { ++ /* Record times only for create ++ * i.e when the inode is created */ ++ CTR_DB_REC(ctr_local).do_record_times = (isdentrycreatefop( ++ ctr_inode_cx->fop_type)) ++ ? _gf_true ++ : _gf_false; ++ } ++ ++ /*Fill the db record for insertion*/ ++ ret = fill_db_record_for_wind(this, ctr_local, ctr_inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND, ++ "WIND: Error filling ctr local"); ++ goto out; ++ } ++ ++ /*Insert the db record*/ ++ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_RECORD_WIND_FAILED, ++ "WIND: Inserting of record failed!"); ++ goto out; ++ } ++ } ++ ret = 0; ++out: ++ ++ if (ret) { ++ free_ctr_local(ctr_local); ++ frame->local = NULL; ++ } ++ ++ return ret; ++} ++ ++/******************************************************************************* ++ * CTR INSERT UNWIND ++ * ***************************************************************************** ++ * Function used to insert/update record into the database during a unwind fop ++ * This function destroys ctr_local structure into the frame of the fop ++ * call at the end. ++ * ****************************************************************************/ ++static inline int ++ctr_insert_unwind(call_frame_t *frame, xlator_t *this, gfdb_fop_type_t fop_type, ++ gfdb_fop_path_t fop_path) ++{ ++ int ret = -1; ++ gf_ctr_private_t *_priv = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ ++ GF_ASSERT(_priv->_db_conn); ++ ++ ctr_local = frame->local; ++ ++ if (ctr_local && (_priv->ctr_record_unwind || isdentryfop(fop_type)) && ++ (ctr_local->ia_inode_type != IA_IFDIR)) { ++ CTR_DB_REC(ctr_local).do_record_uwind_time = _priv->ctr_record_unwind; ++ ++ ret = fill_db_record_for_unwind(this, ctr_local, fop_type, fop_path); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, ++ "UNWIND: Error filling ctr local"); ++ goto out; ++ } ++ ++ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, ++ "UNWIND: Error filling ctr local"); ++ goto out; ++ } ++ } ++ ret = 0; ++out: ++ return ret; ++} ++ ++/****************************************************************************** ++ * Delete file/flink record/s from db ++ * ****************************************************************************/ ++static inline int ++ctr_delete_hard_link_from_db(xlator_t *this, uuid_t gfid, uuid_t pargfid, ++ char *basename, gfdb_fop_type_t fop_type, ++ gfdb_fop_path_t fop_path) ++{ ++ int ret = -1; ++ gfdb_db_record_t gfdb_db_record; ++ gf_ctr_private_t *_priv = NULL; ++ ++ _priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, _priv, out); ++ GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(gfid)), out); ++ GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(pargfid)), out); ++ GF_VALIDATE_OR_GOTO(this->name, (fop_type == GFDB_FOP_DENTRY_WRITE), out); ++ GF_VALIDATE_OR_GOTO( ++ this->name, (fop_path == GFDB_FOP_UNDEL || GFDB_FOP_UNDEL_ALL), out); ++ ++ /* Set gfdb_db_record to 0 */ ++ memset(&gfdb_db_record, 0, sizeof(gfdb_db_record)); ++ ++ /* Copy basename */ ++ if (snprintf(gfdb_db_record.file_name, GF_NAME_MAX, "%s", basename) >= ++ GF_NAME_MAX) ++ goto out; ++ ++ /* Copy gfid into db record */ ++ gf_uuid_copy(gfdb_db_record.gfid, gfid); ++ ++ /* Copy pargid into db record */ ++ gf_uuid_copy(gfdb_db_record.pargfid, pargfid); ++ ++ gfdb_db_record.gfdb_fop_path = fop_path; ++ gfdb_db_record.gfdb_fop_type = fop_type; ++ ++ /*send delete request to db*/ ++ ret = insert_record(_priv->_db_conn, &gfdb_db_record); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RECORD_WIND_FAILED, ++ "Failed to delete record. %s", basename); ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++/******************************* Hard link function ***************************/ ++ ++static inline gf_boolean_t ++__is_inode_expired(ctr_xlator_ctx_t *ctr_xlator_ctx, gf_ctr_private_t *_priv, ++ gfdb_time_t *current_time) ++{ ++ gf_boolean_t ret = _gf_false; ++ uint64_t time_diff = 0; ++ ++ GF_ASSERT(ctr_xlator_ctx); ++ GF_ASSERT(_priv); ++ GF_ASSERT(current_time); ++ ++ time_diff = current_time->tv_sec - ctr_xlator_ctx->inode_heal_period; ++ ++ ret = (time_diff >= _priv->ctr_lookupheal_inode_timeout) ? _gf_true ++ : _gf_false; ++ return ret; ++} ++ ++static inline gf_boolean_t ++__is_hardlink_expired(ctr_hard_link_t *ctr_hard_link, gf_ctr_private_t *_priv, ++ gfdb_time_t *current_time) ++{ ++ gf_boolean_t ret = _gf_false; ++ uint64_t time_diff = 0; ++ ++ GF_ASSERT(ctr_hard_link); ++ GF_ASSERT(_priv); ++ GF_ASSERT(current_time); ++ ++ time_diff = current_time->tv_sec - ctr_hard_link->hardlink_heal_period; ++ ++ ret = ret || (time_diff >= _priv->ctr_lookupheal_link_timeout) ? _gf_true ++ : _gf_false; ++ ++ return ret; ++} ++ ++/* Return values of heal*/ ++typedef enum ctr_heal_ret_val { ++ CTR_CTX_ERROR = -1, ++ /* No healing required */ ++ CTR_TRY_NO_HEAL = 0, ++ /* Try healing hard link */ ++ CTR_TRY_HARDLINK_HEAL = 1, ++ /* Try healing inode */ ++ CTR_TRY_INODE_HEAL = 2, ++} ctr_heal_ret_val_t; ++ ++/** ++ * @brief Function to add hard link to the inode context variable. ++ * The inode context maintainences a in-memory list. This is used ++ * smart healing of database. ++ * @param frame of the FOP ++ * @param this is the Xlator instant ++ * @param inode ++ * @return Return ctr_heal_ret_val_t ++ */ ++ ++static inline ctr_heal_ret_val_t ++add_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode) ++{ ++ ctr_heal_ret_val_t ret_val = CTR_TRY_NO_HEAL; ++ int ret = -1; ++ gf_ctr_local_t *ctr_local = NULL; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ ctr_hard_link_t *ctr_hard_link = NULL; ++ gf_ctr_private_t *_priv = NULL; ++ gfdb_time_t current_time = {0}; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ GF_ASSERT(inode); ++ GF_ASSERT(this->private); ++ ++ _priv = this->private; ++ ++ ctr_local = frame->local; ++ if (!ctr_local) { ++ goto out; ++ } ++ ++ ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode); ++ if (!ctr_xlator_ctx) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, ++ "Failed accessing ctr inode context"); ++ goto out; ++ } ++ ++ LOCK(&ctr_xlator_ctx->lock); ++ ++ /* Check if the hard link already exists ++ * in the ctr inode context*/ ++ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, ++ CTR_DB_REC(ctr_local).pargfid, ++ CTR_DB_REC(ctr_local).file_name); ++ /* if there then ignore */ ++ if (ctr_hard_link) { ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); ++ ret_val = CTR_CTX_ERROR; ++ goto unlock; ++ } ++ ++ if (__is_hardlink_expired(ctr_hard_link, _priv, ¤t_time)) { ++ ctr_hard_link->hardlink_heal_period = current_time.tv_sec; ++ ret_val = ret_val | CTR_TRY_HARDLINK_HEAL; ++ } ++ ++ if (__is_inode_expired(ctr_xlator_ctx, _priv, ¤t_time)) { ++ ctr_xlator_ctx->inode_heal_period = current_time.tv_sec; ++ ret_val = ret_val | CTR_TRY_INODE_HEAL; ++ } ++ ++ goto unlock; ++ } ++ ++ /* Add the hard link to the list*/ ++ ret = ctr_add_hard_link(this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid, ++ CTR_DB_REC(ctr_local).file_name); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED, ++ "Failed to add hardlink to the ctr inode context"); ++ ret_val = CTR_CTX_ERROR; ++ goto unlock; ++ } ++ ++ ret_val = CTR_TRY_NO_HEAL; ++unlock: ++ UNLOCK(&ctr_xlator_ctx->lock); ++out: ++ return ret_val; ++} ++ ++static inline int ++delete_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode) ++{ ++ int ret = -1; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ GF_ASSERT(inode); ++ ++ ctr_local = frame->local; ++ if (!ctr_local) { ++ goto out; ++ } ++ ++ ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode); ++ if (!ctr_xlator_ctx) { ++ /* Since there is no ctr inode context so nothing more to do */ ++ ret = 0; ++ goto out; ++ } ++ ++ ret = ctr_delete_hard_link(this, ctr_xlator_ctx, ++ CTR_DB_REC(ctr_local).pargfid, ++ CTR_DB_REC(ctr_local).file_name); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED, ++ "Failed to delete hard link"); ++ goto out; ++ } ++ ++ ret = 0; ++ ++out: ++ return ret; ++} ++ ++static inline int ++update_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode) ++{ ++ int ret = -1; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ GF_ASSERT(inode); ++ ++ ctr_local = frame->local; ++ if (!ctr_local) { ++ goto out; ++ } ++ ++ ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode); ++ if (!ctr_xlator_ctx) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, ++ "Failed accessing ctr inode context"); ++ goto out; ++ } ++ ++ ret = ctr_update_hard_link( ++ this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid, ++ CTR_DB_REC(ctr_local).file_name, CTR_DB_REC(ctr_local).old_pargfid, ++ CTR_DB_REC(ctr_local).old_file_name); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED, ++ "Failed to delete hard link"); ++ goto out; ++ } ++ ++ ret = 0; ++ ++out: ++ return ret; ++} ++ ++/****************************************************************************** ++ * ++ * CTR xlator init related functions ++ * ++ * ++ * ****************************************************************************/ ++int ++extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type); ++ ++int ++extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv); ++ ++#endif +diff --git a/xlators/features/changetimerecorder/src/ctr-messages.h b/xlators/features/changetimerecorder/src/ctr-messages.h +new file mode 100644 +index 0000000..23adf0a +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr-messages.h +@@ -0,0 +1,61 @@ ++/* ++ Copyright (c) 2013 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++ */ ++ ++#ifndef _CTR_MESSAGES_H_ ++#define _CTR_MESSAGES_H_ ++ ++#include ++ ++/* To add new message IDs, append new identifiers at the end of the list. ++ * ++ * Never remove a message ID. If it's not used anymore, you can rename it or ++ * leave it as it is, but not delete it. This is to prevent reutilization of ++ * IDs by other messages. ++ * ++ * The component name must match one of the entries defined in ++ * glfs-message-id.h. ++ */ ++ ++GLFS_MSGID( ++ CTR, CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, ++ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND, ++ CTR_MSG_INSERT_LINK_WIND_FAILED, CTR_MSG_INSERT_WRITEV_WIND_FAILED, ++ CTR_MSG_INSERT_WRITEV_UNWIND_FAILED, CTR_MSG_INSERT_SETATTR_WIND_FAILED, ++ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED, ++ CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED, ++ CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED, ++ CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED, ++ CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED, ++ CTR_MSG_INSERT_TRUNCATE_WIND_FAILED, CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED, ++ CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED, ++ CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED, CTR_MSG_INSERT_RENAME_WIND_FAILED, ++ CTR_MSG_INSERT_RENAME_UNWIND_FAILED, ++ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, CTR_MSG_ADD_HARDLINK_FAILED, ++ CTR_MSG_DELETE_HARDLINK_FAILED, CTR_MSG_UPDATE_HARDLINK_FAILED, ++ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, CTR_MSG_INSERT_UNLINK_WIND_FAILED, ++ CTR_MSG_XDATA_NULL, CTR_MSG_INSERT_FSYNC_WIND_FAILED, ++ CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED, ++ CTR_MSG_INSERT_MKNOD_WIND_FAILED, CTR_MSG_INSERT_CREATE_WIND_FAILED, ++ CTR_MSG_INSERT_CREATE_UNWIND_FAILED, CTR_MSG_INSERT_RECORD_WIND_FAILED, ++ CTR_MSG_INSERT_READV_WIND_FAILED, CTR_MSG_GET_GFID_FROM_DICT_FAILED, ++ CTR_MSG_SET, CTR_MSG_FATAL_ERROR, CTR_MSG_DANGLING_VOLUME, ++ CTR_MSG_CALLOC_FAILED, CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED, ++ CTR_MSG_INIT_DB_PARAMS_FAILED, CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED, ++ CTR_MSG_MEM_ACC_INIT_FAILED, CTR_MSG_CLOSE_DB_CONN_FAILED, ++ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, CTR_MSG_WRONG_FOP_PATH, ++ CTR_MSG_CONSTRUCT_DB_PATH_FAILED, CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, ++ CTR_MSG_XLATOR_DISABLED, CTR_MSG_HARDLINK_MISSING_IN_LIST, ++ CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED, CTR_MSG_INIT_LOCK_FAILED, ++ CTR_MSG_COPY_FAILED, CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED, ++ CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED, CTR_MSG_NULL_LOCAL); ++ ++#endif /* !_CTR_MESSAGES_H_ */ +diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c +new file mode 100644 +index 0000000..b6b66d5 +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c +@@ -0,0 +1,362 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include "ctr-xlator-ctx.h" ++#include "ctr-messages.h" ++#include ++#include ++ ++#define IS_THE_ONLY_HARDLINK(ctr_hard_link) \ ++ (ctr_hard_link->list.next == ctr_hard_link->list.prev) ++ ++static void ++fini_ctr_hard_link(ctr_hard_link_t **ctr_hard_link) ++{ ++ GF_ASSERT(ctr_hard_link); ++ ++ if (*ctr_hard_link) ++ return; ++ GF_FREE((*ctr_hard_link)->base_name); ++ GF_FREE(*ctr_hard_link); ++ *ctr_hard_link = NULL; ++} ++ ++/* Please lock the ctr_xlator_ctx before using this function */ ++ctr_hard_link_t * ++ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name) ++{ ++ ctr_hard_link_t *_hard_link = NULL; ++ ctr_hard_link_t *searched_hardlink = NULL; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(ctr_xlator_ctx); ++ ++ if (pgfid == NULL || base_name == NULL) ++ goto out; ++ ++ /*linear search*/ ++ list_for_each_entry(_hard_link, &ctr_xlator_ctx->hardlink_list, list) ++ { ++ if (gf_uuid_compare(_hard_link->pgfid, pgfid) == 0 && ++ _hard_link->base_name && ++ strcmp(_hard_link->base_name, base_name) == 0) { ++ searched_hardlink = _hard_link; ++ break; ++ } ++ } ++ ++out: ++ return searched_hardlink; ++} ++ ++/* Please lock the ctr_xlator_ctx before using this function */ ++int ++ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name) ++{ ++ int ret = -1; ++ ctr_hard_link_t *ctr_hard_link = NULL; ++ struct timeval current_time = {0}; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(ctr_xlator_ctx); ++ ++ if (pgfid == NULL || base_name == NULL) ++ goto out; ++ ++ ctr_hard_link = GF_CALLOC(1, sizeof(*ctr_hard_link), gf_ctr_mt_hard_link_t); ++ if (!ctr_hard_link) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED, ++ "Failed allocating " ++ "ctr_hard_link"); ++ goto out; ++ } ++ ++ /*Initialize the ctr_hard_link object and ++ * Assign the values : parent GFID and basename*/ ++ INIT_LIST_HEAD(&ctr_hard_link->list); ++ gf_uuid_copy(ctr_hard_link->pgfid, pgfid); ++ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED, ++ "Failed copying basename" ++ "to ctr_hard_link"); ++ goto error; ++ } ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); ++ goto error; ++ } ++ ++ /*Add the hard link to the list*/ ++ list_add_tail(&ctr_hard_link->list, &ctr_xlator_ctx->hardlink_list); ++ ++ ctr_hard_link->hardlink_heal_period = current_time.tv_sec; ++ ++ /*aal izz well!*/ ++ ret = 0; ++ goto out; ++error: ++ GF_FREE(ctr_hard_link); ++out: ++ return ret; ++} ++ ++static void ++__delete_hard_link_from_list(ctr_hard_link_t **ctr_hard_link) ++{ ++ GF_ASSERT(ctr_hard_link); ++ GF_ASSERT(*ctr_hard_link); ++ ++ /*Remove hard link from list*/ ++ list_del(&(*ctr_hard_link)->list); ++ fini_ctr_hard_link(ctr_hard_link); ++} ++ ++int ++ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name) ++{ ++ int ret = -1; ++ ctr_hard_link_t *ctr_hard_link = NULL; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(ctr_xlator_ctx); ++ ++ LOCK(&ctr_xlator_ctx->lock); ++ ++ /*Check if the hard link is present */ ++ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, pgfid, ++ base_name); ++ if (!ctr_hard_link) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_HARDLINK_MISSING_IN_LIST, ++ "Hard link doesn't exist in the list"); ++ goto out; ++ } ++ ++ __delete_hard_link_from_list(&ctr_hard_link); ++ ctr_hard_link = NULL; ++ ++ ret = 0; ++out: ++ UNLOCK(&ctr_xlator_ctx->lock); ++ ++ return ret; ++} ++ ++int ++ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name, uuid_t old_pgfid, ++ const char *old_base_name) ++{ ++ int ret = -1; ++ ctr_hard_link_t *ctr_hard_link = NULL; ++ struct timeval current_time = {0}; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(ctr_xlator_ctx); ++ ++ LOCK(&ctr_xlator_ctx->lock); ++ ++ /*Check if the hard link is present */ ++ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, old_pgfid, ++ old_base_name); ++ if (!ctr_hard_link) { ++ gf_msg_trace(this->name, 0, ++ "Hard link doesn't exist" ++ " in the list"); ++ /* Since the hard link is not present in the list ++ * we add it to the list */ ++ ret = ctr_add_hard_link(this, ctr_xlator_ctx, pgfid, base_name); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED, ++ "Failed adding hard link to the list"); ++ goto out; ++ } ++ ret = 0; ++ goto out; ++ } ++ ++ /* update the hard link */ ++ gf_uuid_copy(ctr_hard_link->pgfid, pgfid); ++ GF_FREE(ctr_hard_link->base_name); ++ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED, ++ "Failed copying basename" ++ "to ctr_hard_link"); ++ /* delete the corrupted entry */ ++ __delete_hard_link_from_list(&ctr_hard_link); ++ ctr_hard_link = NULL; ++ goto out; ++ } ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); ++ ctr_hard_link->hardlink_heal_period = 0; ++ } else { ++ ctr_hard_link->hardlink_heal_period = current_time.tv_sec; ++ } ++ ++ ret = 0; ++ ++out: ++ UNLOCK(&ctr_xlator_ctx->lock); ++ ++ return ret; ++} ++ ++/* Delete all hardlinks */ ++static int ++ctr_delete_all_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx) ++{ ++ int ret = -1; ++ ctr_hard_link_t *ctr_hard_link = NULL; ++ ctr_hard_link_t *tmp = NULL; ++ ++ GF_ASSERT(ctr_xlator_ctx); ++ ++ LOCK(&ctr_xlator_ctx->lock); ++ ++ list_for_each_entry_safe(ctr_hard_link, tmp, &ctr_xlator_ctx->hardlink_list, ++ list) ++ { ++ /*Remove hard link from list*/ ++ __delete_hard_link_from_list(&ctr_hard_link); ++ ctr_hard_link = NULL; ++ } ++ ++ UNLOCK(&ctr_xlator_ctx->lock); ++ ++ ret = 0; ++ ++ return ret; ++} ++ ++/* Please lock the inode before using this function */ ++static ctr_xlator_ctx_t * ++__get_ctr_xlator_ctx(xlator_t *this, inode_t *inode) ++{ ++ int ret = 0; ++ uint64_t _addr = 0; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(inode); ++ ++ ret = __inode_ctx_get(inode, this, &_addr); ++ if (ret < 0) ++ _addr = 0; ++ if (_addr != 0) { ++ ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr; ++ } ++ ++ return ctr_xlator_ctx; ++} ++ ++ctr_xlator_ctx_t * ++init_ctr_xlator_ctx(xlator_t *this, inode_t *inode) ++{ ++ int ret = -1; ++ uint64_t _addr = 0; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ struct timeval current_time = {0}; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(inode); ++ ++ LOCK(&inode->lock); ++ { ++ ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode); ++ if (ctr_xlator_ctx) { ++ ret = 0; ++ goto out; ++ } ++ ctr_xlator_ctx = GF_CALLOC(1, sizeof(*ctr_xlator_ctx), ++ gf_ctr_mt_xlator_ctx); ++ if (!ctr_xlator_ctx) ++ goto out; ++ ++ ret = LOCK_INIT(&ctr_xlator_ctx->lock); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, ret, CTR_MSG_INIT_LOCK_FAILED, ++ "Failed init lock %s", strerror(ret)); ++ goto out; ++ } ++ _addr = (uint64_t)(uintptr_t)ctr_xlator_ctx; ++ ++ ret = __inode_ctx_set(inode, this, &_addr); ++ if (ret) { ++ goto out; ++ } ++ ++ INIT_LIST_HEAD(&ctr_xlator_ctx->hardlink_list); ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); ++ goto out; ++ } ++ ++ ctr_xlator_ctx->inode_heal_period = current_time.tv_sec; ++ } ++ ret = 0; ++out: ++ if (ret) { ++ GF_FREE(ctr_xlator_ctx); ++ ctr_xlator_ctx = NULL; ++ } ++ ++ UNLOCK(&inode->lock); ++ ++ return ctr_xlator_ctx; ++} ++ ++void ++fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode) ++{ ++ int ret = 0; ++ uint64_t _addr = 0; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ ++ inode_ctx_del(inode, this, &_addr); ++ if (!_addr) ++ return; ++ ++ ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr; ++ ++ ret = ctr_delete_all_hard_link(this, ctr_xlator_ctx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DELETE_HARDLINK_FAILED, ++ "Failed deleting all " ++ "hard links from inode context"); ++ } ++ ++ LOCK_DESTROY(&ctr_xlator_ctx->lock); ++ ++ GF_FREE(ctr_xlator_ctx); ++} ++ ++ctr_xlator_ctx_t * ++get_ctr_xlator_ctx(xlator_t *this, inode_t *inode) ++{ ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ ++ LOCK(&inode->lock); ++ ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode); ++ UNLOCK(&inode->lock); ++ ++ return ctr_xlator_ctx; ++} +diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h +new file mode 100644 +index 0000000..4e3bf7e +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h +@@ -0,0 +1,68 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __CTR_XLATOR_CTX_H ++#define __CTR_XLATOR_CTX_H ++ ++#include ++#include "ctr_mem_types.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++typedef struct ctr_hard_link { ++ uuid_t pgfid; ++ char *base_name; ++ /* Hardlink expiry : Defines the expiry period after which a ++ * database heal is attempted. */ ++ uint64_t hardlink_heal_period; ++ struct list_head list; ++} ctr_hard_link_t; ++ ++typedef struct ctr_xlator_ctx { ++ /* This represents the looked up hardlinks ++ * NOTE: This doesn't represent all physical hardlinks of the inode*/ ++ struct list_head hardlink_list; ++ uint64_t inode_heal_period; ++ gf_lock_t lock; ++} ctr_xlator_ctx_t; ++ ++ctr_hard_link_t * ++ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name); ++ ++int ++ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name); ++ ++int ++ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name); ++ ++int ++ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name, uuid_t old_pgfid, ++ const char *old_base_name); ++ ++ctr_xlator_ctx_t * ++get_ctr_xlator_ctx(xlator_t *this, inode_t *inode); ++ ++ctr_xlator_ctx_t * ++init_ctr_xlator_ctx(xlator_t *this, inode_t *inode); ++ ++void ++fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode); ++ ++#endif +diff --git a/xlators/features/changetimerecorder/src/ctr_mem_types.h b/xlators/features/changetimerecorder/src/ctr_mem_types.h +new file mode 100644 +index 0000000..7b8f531 +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr_mem_types.h +@@ -0,0 +1,22 @@ ++/* ++ Copyright (c) 2008-2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __CTR_MEM_TYPES_H__ ++#define __CTR_MEM_TYPES_H__ ++ ++#include "gfdb_mem-types.h" ++ ++enum gf_ctr_mem_types_ { ++ gf_ctr_mt_private_t = gfdb_mt_end + 1, ++ gf_ctr_mt_xlator_ctx, ++ gf_ctr_mt_hard_link_t, ++ gf_ctr_mt_end ++}; ++#endif +-- +1.8.3.1 + diff --git a/SOURCES/0086-Revert-tiering-remove-the-translator-from-build-and-.patch b/SOURCES/0086-Revert-tiering-remove-the-translator-from-build-and-.patch new file mode 100644 index 0000000..b612ddf --- /dev/null +++ b/SOURCES/0086-Revert-tiering-remove-the-translator-from-build-and-.patch @@ -0,0 +1,3194 @@ +From 06adac5dbac7b2067232270cbee12931400f7824 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham +Date: Sat, 6 Apr 2019 17:00:47 +0530 +Subject: [PATCH 086/124] Revert "tiering: remove the translator from build and + glusterd" + +This reverts commit 55a6ba56bea9ec0d3316c005300c514ea3ab0e54. +Add the test files and glusterd related changes. + +Label: DOWNSTREAM ONLY + +Change-Id: Ib704b7142a82cb1e94538a48af916730992a5701 +Signed-off-by: Hari Gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/166246 +Reviewed-by: Sanju Rakonde +Reviewed-by: Nithya Balachandran +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + MAINTAINERS | 18 + + tests/basic/afr/granular-esh/cli.t | 30 +- + ...1214222-directories_missing_after_attach_tier.t | 61 ++ + ...60185-donot-allow-detach-commit-unnecessarily.t | 47 ++ + tests/basic/tier/ctr-rename-overwrite.t | 50 ++ + tests/basic/tier/file_lock.c | 72 ++ + tests/basic/tier/file_with_spaces.t | 71 ++ + tests/basic/tier/fops-during-migration-pause.t | 89 +++ + tests/basic/tier/fops-during-migration.t | 105 +++ + tests/basic/tier/frequency-counters.t | 82 +++ + tests/basic/tier/legacy-many.t | 92 +++ + tests/basic/tier/locked_file_migration.t | 80 +++ + tests/basic/tier/new-tier-cmds.t | 129 ++++ + tests/basic/tier/readdir-during-migration.t | 65 ++ + tests/basic/tier/record-metadata-heat.t | 106 +++ + tests/basic/tier/tier-heald.t | 98 +++ + tests/basic/tier/tier-snapshot.t | 47 ++ + tests/basic/tier/tier.t | 219 +++++++ + tests/basic/tier/tier_lookup_heal.t | 69 ++ + tests/basic/tier/tierd_check.t | 128 ++++ + tests/basic/tier/unlink-during-migration.t | 92 +++ + ...03028-Rebalance-glusterd-rpc-connection-issue.t | 78 +++ + tests/bugs/quota/bug-1288474.t | 51 ++ + .../bug-1290965-detect-bitrotten-objects.t | 53 ++ + .../tier/bug-1205545-CTR-and-trash-integration.t | 72 ++ + tests/bugs/tier/bug-1279376-rename-demoted-file.t | 93 +++ + xlators/mgmt/glusterd/src/glusterd-volgen.c | 75 +++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 723 +++++++++++++++++++++ + 28 files changed, 2894 insertions(+), 1 deletion(-) + create mode 100755 tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t + create mode 100644 tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t + create mode 100755 tests/basic/tier/ctr-rename-overwrite.t + create mode 100644 tests/basic/tier/file_lock.c + create mode 100755 tests/basic/tier/file_with_spaces.t + create mode 100755 tests/basic/tier/fops-during-migration-pause.t + create mode 100755 tests/basic/tier/fops-during-migration.t + create mode 100644 tests/basic/tier/frequency-counters.t + create mode 100644 tests/basic/tier/legacy-many.t + create mode 100755 tests/basic/tier/locked_file_migration.t + create mode 100644 tests/basic/tier/new-tier-cmds.t + create mode 100644 tests/basic/tier/readdir-during-migration.t + create mode 100755 tests/basic/tier/record-metadata-heat.t + create mode 100644 tests/basic/tier/tier-heald.t + create mode 100644 tests/basic/tier/tier-snapshot.t + create mode 100755 tests/basic/tier/tier.t + create mode 100755 tests/basic/tier/tier_lookup_heal.t + create mode 100644 tests/basic/tier/tierd_check.t + create mode 100755 tests/basic/tier/unlink-during-migration.t + create mode 100644 tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t + create mode 100755 tests/bugs/quota/bug-1288474.t + create mode 100644 tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t + create mode 100644 tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t + create mode 100755 tests/bugs/tier/bug-1279376-rename-demoted-file.t + +diff --git a/MAINTAINERS b/MAINTAINERS +index b1fc0ee..1f4c93a 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -103,6 +103,12 @@ P: Kotresh HR + S: Maintained + F: xlators/features/changelog/ + ++Changetimerecorder ++M: Shyamsundar Ranganathan ++P: Hari Gowtham ++S: Maintained ++F: xlators/features/changetimerecorder/ ++ + Decompounder + M: Krutika Dhananjay + P: Pranith Karampuri +@@ -248,6 +254,12 @@ P: Xavier Hernandez + S: Maintained + F: xlators/features/shard/ + ++Tiering ++M: Shyamsundar Ranganathan ++P: Hari Gowtham ++S: Maintained ++F: xlators/cluster/dht/src/tier.c ++ + Trash + M: Anoop C S + M: Jiffin Tony Thottan +@@ -327,6 +339,12 @@ P: Soumya Koduri + S: Maintained + F: api/ + ++libgfdb ++M: Shyamsundar Ranganathan ++P: Hari Gowtham ++S: Maintained ++F: libglusterfs/src/gfdb/ ++ + libglusterfs + M: Amar Tumballi + M: Jeff Darcy +diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t +index 10b6c63..995d93e 100644 +--- a/tests/basic/afr/granular-esh/cli.t ++++ b/tests/basic/afr/granular-esh/cli.t +@@ -11,7 +11,7 @@ TESTS_EXPECTED_IN_LOOP=4 + TEST glusterd + TEST pidof glusterd + +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} + # Test that enabling the option should work on a newly created volume + TEST $CLI volume set $V0 cluster.granular-entry-heal on + TEST $CLI volume set $V0 cluster.granular-entry-heal off +@@ -25,6 +25,34 @@ TEST $CLI volume start $V1 + TEST ! $CLI volume heal $V1 granular-entry-heal enable + TEST ! $CLI volume heal $V1 granular-entry-heal disable + ++####################### ++###### TIER TEST ###### ++####################### ++# Execute the same command on a disperse + replicate tiered volume and make ++# sure the option is set on the replicate leg of the volume ++TEST $CLI volume tier $V1 attach replica 2 $H0:$B0/${V1}{3,4} ++TEST $CLI volume heal $V1 granular-entry-heal enable ++EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal ++TEST $CLI volume heal $V1 granular-entry-heal disable ++EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal ++ ++# Kill a disperse brick and make heal be pending on the volume. ++TEST kill_brick $V1 $H0 $B0/${V1}0 ++ ++# Now make sure that one offline brick in disperse does not affect enabling the ++# option on the volume. ++TEST $CLI volume heal $V1 granular-entry-heal enable ++EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal ++TEST $CLI volume heal $V1 granular-entry-heal disable ++EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal ++ ++# Now kill a replicate brick. ++TEST kill_brick $V1 $H0 $B0/${V1}3 ++# Now make sure that one offline brick in replicate causes the command to be ++# failed. ++TEST ! $CLI volume heal $V1 granular-entry-heal enable ++EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal ++ + ###################### + ### REPLICATE TEST ### + ###################### +diff --git a/tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t b/tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t +new file mode 100755 +index 0000000..f9166d7 +--- /dev/null ++++ b/tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t +@@ -0,0 +1,61 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++LAST_BRICK=3 ++CACHE_BRICK_FIRST=4 ++CACHE_BRICK_LAST=5 ++DEMOTE_TIMEOUT=12 ++PROMOTE_TIMEOUT=5 ++ ++ ++LAST_BRICK=1 ++CACHE_BRICK=2 ++DEMOTE_TIMEOUT=12 ++PROMOTE_TIMEOUT=5 ++MIGRATION_TIMEOUT=10 ++cleanup ++ ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Basic operations. ++cd $M0 ++TEST stat . ++TEST mkdir d1 ++TEST [ -d d1 ] ++TEST touch file1 ++TEST [ -e file1 ] ++ ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++TEST $CLI volume set $V0 features.ctr-enabled on ++ ++#check whether the directory's and files are present on mount or not. ++TEST [ -d d1 ] ++TEST [ -e file1 ] ++ ++cd ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0; ++ ++tier_status () ++{ ++ $CLI volume tier $V0 detach status | grep progress | wc -l ++} ++ ++TEST $CLI volume tier $V0 detach start ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_status ++TEST $CLI volume tier $V0 detach commit ++ ++EXPECT "0" confirm_tier_removed ${V0}${CACHE_BRICK_FIRST} ++ ++EXPECT_WITHIN $REBALANCE_TIMEOUT "0" confirm_vol_stopped $V0 ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t b/tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t +new file mode 100644 +index 0000000..6efbe32 +--- /dev/null ++++ b/tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t +@@ -0,0 +1,47 @@ ++#!/bin/bash ++ ++## Test case for BZ: 1260185 ++## Do not allow detach-tier commit without "force" option or without ++## user have not started "detach-tier start" operation ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../cluster.rc ++. $(dirname $0)/../../tier.rc ++ ++cleanup; ++ ++## Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++ ++## Lets create and start the volume ++TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2} ++TEST $CLI volume start $V0 ++ ++## Perform attach-tier operation on volume $V0 ++TEST $CLI volume tier $V0 attach $H0:$B0/${V0}{3..4} ++ ++## detach-tier commit operation without force option on volume $V0 ++## should not succeed ++TEST ! $CLI --mode=script volume tier $V0 detach commit ++ ++## detach-tier commit operation with force option on volume $V0 ++## should succeed ++TEST $CLI volume tier $V0 detach force ++ ++sleep 3 ++ ++## Again performing attach-tier operation on volume $V0 ++TEST $CLI volume tier $V0 attach $H0:$B0/${V0}{5..6} ++ ++## Do detach-tier start on volume $V0 ++TEST $CLI volume tier $V0 detach start ++ ++## Now detach-tier commit on volume $V0 should succeed. ++## wait for the detach to complete ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_commit_for_single_node ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1517961 +diff --git a/tests/basic/tier/ctr-rename-overwrite.t b/tests/basic/tier/ctr-rename-overwrite.t +new file mode 100755 +index 0000000..73ee758 +--- /dev/null ++++ b/tests/basic/tier/ctr-rename-overwrite.t +@@ -0,0 +1,50 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++LAST_BRICK=1 ++CACHE_BRICK_FIRST=4 ++CACHE_BRICK_LAST=5 ++ ++DEMOTE_FREQ=5 ++PROMOTE_FREQ=5 ++ ++cleanup ++ ++# Start glusterd ++TEST glusterd ++TEST pidof glusterd ++ ++# Set-up tier cluster ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++TEST $CLI volume start $V0 ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++ ++TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ ++# Start and mount the volume after enabling CTR ++TEST $CLI volume set $V0 features.ctr-enabled on ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# create two files ++echo "hello world" > $M0/file1 ++echo "hello world" > $M0/file2 ++ ++# db in hot brick shows 4 record. 2 for file1 and 2 for file2 ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 4 ] ++ ++#overwrite file2 with file1 ++mv -f $M0/file1 $M0/file2 ++ ++# Now the db in hot tier should have only 2 records for file1. ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 2 ] ++ ++cleanup ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/file_lock.c b/tests/basic/tier/file_lock.c +new file mode 100644 +index 0000000..20fdbc0 +--- /dev/null ++++ b/tests/basic/tier/file_lock.c +@@ -0,0 +1,72 @@ ++#include ++#include ++#include ++#include ++ ++void ++usage(void) ++{ ++ printf("Usage: testlock [R|W]\n"); ++ return; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ char *file_path = NULL; ++ int fd = -1; ++ struct flock lock = {0}; ++ int ret = -1; ++ int c = 0; ++ ++ if (argc != 3) { ++ usage(); ++ exit(1); ++ } ++ ++ file_path = argv[1]; ++ fd = open(file_path, O_RDWR); ++ ++ if (-1 == fd) { ++ printf("Failed to open file %s. %m\n", file_path); ++ exit(1); ++ } ++ ++ /* TODO: Check for invalid input*/ ++ ++ if (!strcmp(argv[2], "W")) { ++ lock.l_type = F_WRLCK; ++ printf("Taking write lock\n"); ++ ++ } else { ++ lock.l_type = F_RDLCK; ++ printf("Taking read lock\n"); ++ } ++ ++ lock.l_whence = SEEK_SET; ++ lock.l_start = 0; ++ lock.l_len = 0; ++ lock.l_pid = getpid(); ++ ++ printf("Acquiring lock on %s\n", file_path); ++ ret = fcntl(fd, F_SETLK, &lock); ++ if (ret) { ++ printf("Failed to acquire lock on %s (%m)\n", file_path); ++ close(fd); ++ exit(1); ++ } ++ ++ sleep(10); ++ ++ /*Unlock*/ ++ ++ printf("Releasing lock on %s\n", file_path); ++ lock.l_type = F_UNLCK; ++ ret = fcntl(fd, F_SETLK, &lock); ++ if (ret) { ++ printf("Failed to release lock on %s (%m)\n", file_path); ++ } ++ ++ close(fd); ++ return ret; ++} +diff --git a/tests/basic/tier/file_with_spaces.t b/tests/basic/tier/file_with_spaces.t +new file mode 100755 +index 0000000..919b900 +--- /dev/null ++++ b/tests/basic/tier/file_with_spaces.t +@@ -0,0 +1,71 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=5 ++DEMOTE_TIMEOUT=10 ++PROMOTE_FREQ=5 ++ ++FILE_SPACE="Testing filenames with spaces.log" ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++ ++# The file will be created on the hot tier ++ ++touch "$M0/$FILE_SPACE" ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name "$FILE_SPACE"` ++echo "File path on hot tier: "$HPATH ++ ++EXPECT "yes" exists_and_regular_file $HPATH ++ ++# Wait for the tier process to demote the file ++sleep $DEMOTE_TIMEOUT ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name "$FILE_SPACE"` ++echo "File path on cold tier: "$CPATH ++ ++EXPECT "yes" exists_and_regular_file $CPATH ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/fops-during-migration-pause.t b/tests/basic/tier/fops-during-migration-pause.t +new file mode 100755 +index 0000000..46fc6e4 +--- /dev/null ++++ b/tests/basic/tier/fops-during-migration-pause.t +@@ -0,0 +1,89 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=10 ++PROMOTE_FREQ=10 ++ ++TEST_STR="Testing write and truncate fops on tier migration" ++ ++function is_sticky_set () { ++ echo $1 ++ if [ -k $1 ]; ++ then ++ echo "yes" ++ else ++ echo "no" ++ fi ++} ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++TEST mkdir $M0/dir1 ++ ++# Create a large file (800MB), so that rebalance takes time ++# The file will be created on the hot tier ++sleep_until_mid_cycle $DEMOTE_FREQ ++dd if=/dev/zero of=$M0/dir1/FILE1 bs=256k count=5120 ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name FILE1` ++echo "File path on hot tier: "$HPATH ++ ++ ++# Wait for the tier process to demote the file ++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $HPATH ++ ++TEST $CLI volume set $V0 cluster.tier-pause on ++ ++# Wait for the tier process to finish migrating the file ++EXPECT_WITHIN $REBALANCE_TIMEOUT "no" is_sticky_set $HPATH ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name FILE1` ++ ++# make sure destination is empty ++TEST ! test -s $CPATH ++ ++# make sure source exists and not empty ++TEST test -s $HPATH ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/fops-during-migration.t b/tests/basic/tier/fops-during-migration.t +new file mode 100755 +index 0000000..458c01e +--- /dev/null ++++ b/tests/basic/tier/fops-during-migration.t +@@ -0,0 +1,105 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=5 ++PROMOTE_FREQ=5 ++ ++TEST_STR="Testing write and truncate fops on tier migration" ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume set $V0 cluster.force-migration on ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++ ++# Checks that the contents of the file matches the input string ++#$1 : file_path ++#$2 : comparison string ++ ++function check_file_content () { ++ contents=`cat $1` ++ echo $contents ++ if [ "$contents" = "$2" ]; then ++ echo "1" ++ else ++ echo "0" ++ fi ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++$CLI volume set $V0 diagnostics.client-log-level DEBUG ++ ++TEST mkdir $M0/dir1 ++ ++# Create a large file (320MB), so that rebalance takes time ++# The file will be created on the hot tier ++ ++dd if=/dev/zero of=$M0/dir1/FILE1 bs=64k count=5120 ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name FILE1` ++echo "File path on hot tier: "$HPATH ++ ++ ++# Wait for the tier process to demote the file ++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $HPATH ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name FILE1` ++echo "File path on cold tier: "$CPATH ++ ++# Test setxattr ++TEST setfattr -n "user.test_xattr" -v "qwerty" $M0/dir1/FILE1 ++ ++# Change the file contents while it is being migrated ++echo $TEST_STR > $M0/dir1/FILE1 ++ ++# The file contents should have changed even if the file ++# is not done migrating ++EXPECT "1" check_file_content $M0/dir1/FILE1 "$TEST_STR" ++ ++ ++# Wait for the tier process to finish migrating the file ++EXPECT_WITHIN $REBALANCE_TIMEOUT "no" is_sticky_set $CPATH ++ ++# The file contents should have changed ++EXPECT "1" check_file_content $M0/dir1/FILE1 "$TEST_STR" ++ ++ ++TEST getfattr -n "user.test_xattr" $M0/dir1/FILE1 ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/basic/tier/frequency-counters.t b/tests/basic/tier/frequency-counters.t +new file mode 100644 +index 0000000..08e05df +--- /dev/null ++++ b/tests/basic/tier/frequency-counters.t +@@ -0,0 +1,82 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=10 ++PROMOTE_FREQ=10 ++NUM_FILES=5 ++TEST_DIR=test ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume start $V0 ++} ++ ++function create_dist_tier_vol () { ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-mode test ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 features.record-counters on ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 2 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 2 ++} ++ ++cleanup; ++ ++ ++TEST glusterd ++ ++#Create and start a tiered volume ++create_dist_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++# create some files ++mkdir $M0/$TEST_DIR ++cd $M0/${TEST_DIR} ++ ++date > file1 ++touch file2 ++ ++# attach tier ++create_dist_tier_vol $NUM_BRICKS ++ ++sleep_until_mid_cycle $PROMOTE_FREQ ++ ++# check if promotion on single hit, should fail ++date >> file2 ++cat file1 ++drop_cache $M0 ++sleep $PROMOTE_FREQ ++EXPECT "0" check_counters 0 0 ++ ++# check if promotion on double hit, should suceed ++sleep_until_mid_cycle $PROMOTE_FREQ ++date >> file2 ++drop_cache $M0 ++cat file1 ++date >> file2 ++drop_cache $M0 ++cat file1 ++ ++EXPECT_WITHIN $PROMOTE_FREQ "0" check_counters 2 0 ++ ++TEST ! $CLI volume set $V0 features.record-counters off ++ ++cd / ++ ++cleanup ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/legacy-many.t b/tests/basic/tier/legacy-many.t +new file mode 100644 +index 0000000..5795428 +--- /dev/null ++++ b/tests/basic/tier/legacy-many.t +@@ -0,0 +1,92 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++LAST_BRICK=3 ++CACHE_BRICK_FIRST=4 ++CACHE_BRICK_LAST=5 ++DEMOTE_TIMEOUT=12 ++PROMOTE_TIMEOUT=12 ++MIGRATION_TIMEOUT=10 ++DEMOTE_FREQ=60 ++PROMOTE_FREQ=10 ++TEST_DIR="test_files" ++NUM_FILES=15 ++ ++function read_all { ++ for file in * ++ do ++ cat $file ++ done ++} ++ ++function tier_status () { ++ $CLI volume tier $V0 status | grep "success" | wc -l ++} ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++ ++# Create distributed replica volume ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++TEST $CLI volume start $V0 ++ ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 features.ctr-enabled on ++ ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create a number of "legacy" files before attaching tier ++mkdir $M0/${TEST_DIR} ++cd $M0/${TEST_DIR} ++TEST create_many_files file $NUM_FILES ++wait ++ ++# Attach tier ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++ ++TEST $CLI volume set $V0 cluster.tier-mode test ++TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ ++# wait a little for lookup heal to finish ++wait_for_tier_start ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_status ++ ++# make sure fix layout completed ++CPATH=$B0/${V0}0 ++echo $CPATH > /tmp/out ++TEST getfattr -n "trusted.tier.fix.layout.complete" $CPATH ++ ++# Read "legacy" files ++drop_cache $M0 ++ ++sleep_until_mid_cycle $DEMOTE_FREQ ++ ++TEST read_all ++ ++# Test to make sure files were promoted as expected ++sleep $PROMOTE_TIMEOUT ++EXPECT_WITHIN $PROMOTE_TIMEOUT "0" check_counters $NUM_FILES 0 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" detach_start $V0 ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}${CACHE_BRICK_FIRST}" ++ ++TEST $CLI volume tier $V0 detach commit ++ ++# fix layout flag should be cleared ++TEST ! getfattr -n "trusted.tier.fix.layout.complete" $CPATH ++ ++cd; ++cleanup ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/locked_file_migration.t b/tests/basic/tier/locked_file_migration.t +new file mode 100755 +index 0000000..7fb1717 +--- /dev/null ++++ b/tests/basic/tier/locked_file_migration.t +@@ -0,0 +1,80 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=7 ++PROMOTE_FREQ=30 ++DEMOTE_TIMEOUT=15 ++ ++TEST_STR="Testing write and truncate fops on tier migration" ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ ++#We don't want promotes to happen in this test ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 10 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 10 ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++ ++# Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++TEST mkdir $M0/dir1 ++build_tester $(dirname $0)/file_lock.c -o file_lock ++cp $(dirname $0)/file_lock $M0/file_lock ++ ++# The files will be created on the hot tier ++touch $M0/dir1/FILE1 ++touch $M0/dir1/FILE2 ++ ++# For FILE1, take a POSIX write lock on the entire file. ++# Don't take a lock on FILE2 ++ ++./file_lock $M0/dir1/FILE1 W & ++ ++sleep $DEMOTE_FREQ ++ ++# Wait for the tier process to demote the file ++# Only FILE2 and file_lock should be demoted ++# FILE1 should be skipped because of the lock held ++# on it ++ ++EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 0 2 ++ ++sleep 10 ++ ++rm $(dirname $0)/file_lock ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/new-tier-cmds.t b/tests/basic/tier/new-tier-cmds.t +new file mode 100644 +index 0000000..b9c9390 +--- /dev/null ++++ b/tests/basic/tier/new-tier-cmds.t +@@ -0,0 +1,129 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++. $(dirname $0)/../../cluster.rc ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function check_peers { ++ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l ++} ++ ++function create_dist_tier_vol () { ++ TEST $CLI_1 volume create $V0 disperse 6 redundancy 2 $H1:$B1/${V0}_b1 $H2:$B2/${V0}_b2 $H3:$B3/${V0}_b3 $H1:$B1/${V0}_b4 $H2:$B2/${V0}_b5 $H3:$B3/${V0}_b6 ++ TEST $CLI_1 volume start $V0 ++ TEST $CLI_1 volume tier $V0 attach replica 2 $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 $H3:$B3/${V0}_h3 $H1:$B1/${V0}_h4 $H2:$B2/${V0}_h5 $H3:$B3/${V0}_h6 ++} ++ ++function tier_daemon_status { ++ local _VAR=CLI_$1 ++ local xpath_sel='//node[hostname="Tier Daemon"][path="localhost"]/status' ++ ${!_VAR} --xml volume status $V0 \ ++ | xmllint --xpath "$xpath_sel" - \ ++ | sed -n '/.*\([0-9]*\).*/s//\1/p' ++} ++ ++function detach_xml_status { ++ $CLI_1 volume tier $V0 detach status --xml | sed -n \ ++ '/.*Detach tier status successful/p' | wc -l ++} ++ ++cleanup; ++ ++#setup cluster and test volume ++TEST launch_cluster 3; # start 3-node virtual cluster ++TEST $CLI_1 peer probe $H2; # peer probe server 2 from server 1 cli ++TEST $CLI_1 peer probe $H3; # peer probe server 3 from server 1 cli ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; ++ ++#Create and start a tiered volume ++create_dist_tier_vol ++ ++########### check failure for older commands ############# ++ ++TEST ! $CLI_1 volume rebalance $V0 tier status ++ ++# failure for older command can be removed in 3.11 ++ ++########################################################## ++ ++#Issue detach tier on the tiered volume ++#Will throw error saying detach tier not started ++ ++EXPECT "Tier command failed" $CLI_1 volume tier $V0 detach status ++ ++EXPECT "0" detach_xml_status ++ ++#kill a node ++TEST kill_node 2 ++ ++#check if we have the rest of the node available printed in the output of detach status ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_status_node_down ++ ++TEST $glusterd_2; ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; ++ ++#after starting detach tier the detach tier status should display the status ++sleep 2 ++$CLI_1 volume status ++TEST $CLI_1 volume tier $V0 detach start ++ ++EXPECT "1" detach_xml_status ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status ++ ++#kill a node ++TEST kill_node 2 ++ ++#check if we have the rest of the node available printed in the output of detach status ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status_node_down ++ ++TEST $glusterd_2; ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; ++# Make sure we check that the *bricks* are up and not just the node. >:-( ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_b2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_h2 ++ ++# Parsing normal output doesn't work because of line-wrap issues on our ++# regression machines, and the version of xmllint there doesn't support --xpath ++# so we can't do it that way either. In short, there's no way for us to detect ++# when we can stop waiting, so we just have to wait the maximum time every time ++# and hope any failures will show up later in the script. ++sleep $PROCESS_UP_TIMEOUT ++#XPECT_WITHIN $PROCESS_UP_TIMEOUT 1 tier_daemon_status 2 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status ++ ++TEST $CLI_1 volume tier $V0 detach stop ++ ++#If detach tier is stopped the detach tier command will fail ++ ++EXPECT "Tier command failed" $CLI_1 volume tier $V0 detach status ++ ++TEST $CLI_1 volume tier $V0 detach start ++ ++#wait for the detach to complete ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_commit ++ ++#If detach tier is committed then the detach status should fail throwing an error ++#saying its not a tiered volume ++ ++EXPECT "Tier command failed" $CLI_1 volume tier $V0 detach status ++ ++########### check failure for older commands ############# ++ ++TEST ! $CLI_1 volume rebalance $V0 tier start ++ ++# failure for older command can be removed in 3.11 ++ ++########################################################## ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/readdir-during-migration.t b/tests/basic/tier/readdir-during-migration.t +new file mode 100644 +index 0000000..292ca88 +--- /dev/null ++++ b/tests/basic/tier/readdir-during-migration.t +@@ -0,0 +1,65 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=5 ++PROMOTE_FREQ=5 ++NUM_FILES=30 ++TEST_DIR=test ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-mode test ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++} ++ ++function check_file_count() { ++ if [ $(ls -1 | wc -l) == $1 ]; then ++ echo "1" ++ else ++ echo "0" ++ fi ++} ++ ++cleanup; ++ ++ ++TEST glusterd ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++# Create a number of "legacy" files before attaching tier ++mkdir $M0/${TEST_DIR} ++cd $M0/${TEST_DIR} ++TEST create_many_files tfile $NUM_FILES ++ ++EXPECT "1" check_file_count $NUM_FILES ++ ++sleep $DEMOTE_FREQ ++ ++EXPECT "1" check_file_count $NUM_FILES ++ ++cd / ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/record-metadata-heat.t b/tests/basic/tier/record-metadata-heat.t +new file mode 100755 +index 0000000..f6f35a8 +--- /dev/null ++++ b/tests/basic/tier/record-metadata-heat.t +@@ -0,0 +1,106 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=5 ++DEMOTE_TIMEOUT=10 ++PROMOTE_FREQ=5 ++ ++FILE="file1.txt" ++FILE_LINK="file2.txt" ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-mode test ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 4 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 4 ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++ ++# The file will be created on the hot tier ++touch "$M0/$FILE" ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name "$FILE"` ++echo "File path on hot tier: "$HPATH ++ ++############################################ ++# as per the changes on b8b050c3 ++# To test the xttr set by EC ++TEST ! getfattr -n "trusted.ec.size" $HPATH ++############################################ ++ ++# Expecting the file to be on the hot tier ++EXPECT "yes" exists_and_regular_file $HPATH ++ ++sleep_until_mid_cycle $DEMOTE_FREQ ++ ++# Try to heat the file using 5 metadata operations ++# WITHOUT setting ctr-record-metadata-heat on ++touch "$M0/$FILE" ++chmod +x "$M0/$FILE" ++chown root "$M0/$FILE" ++ln "$M0/$FILE" "$M0/$FILE_LINK" ++rm -rf "$M0/$FILE_LINK" ++ ++# Wait for the tier process to demote the file ++sleep $DEMOTE_TIMEOUT ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name "$FILE"` ++echo "File path on cold tier: "$CPATH ++ ++# Expecting the file to be on cold tier ++EXPECT "yes" exists_and_regular_file $CPATH ++ ++#Set ctr-record-metadata-heat on ++TEST $CLI volume set $V0 ctr-record-metadata-heat on ++ ++sleep_until_mid_cycle $DEMOTE_FREQ ++ ++# Heating the file using 5 metadata operations ++touch "$M0/$FILE" ++chmod +x "$M0/$FILE" ++chown root "$M0/$FILE" ++ln "$M0/$FILE" "$M0/$FILE_LINK" ++rm -rf "$M0/$FILE_LINK" ++ ++# Wait for the tier process to demote the file ++sleep $DEMOTE_TIMEOUT ++ ++# Get the path of the file on the hot tier ++echo "File path on hot tier: "$HPATH ++ ++# Expecting the file to be on the hot tier ++EXPECT "yes" exists_and_regular_file $HPATH ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/basic/tier/tier-heald.t b/tests/basic/tier/tier-heald.t +new file mode 100644 +index 0000000..a8e634f +--- /dev/null ++++ b/tests/basic/tier/tier-heald.t +@@ -0,0 +1,98 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++# This test contains volume heal commands handled by glusterd. ++# Covers enable/disable at the moment. Will be enhanced later to include ++# the other commands as well. ++ ++cleanup; ++TEST glusterd ++TEST pidof glusterd ++ ++volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol" ++ ++# Commands should fail when both tiers are not of distribute type. ++# Glustershd shouldn't be running as long as there are no replicate/disperse ++# volumes ++TEST $CLI volume create dist_tier $H0:$B0/cold ++TEST $CLI volume start dist_tier ++TEST $CLI volume tier dist_tier attach $H0:$B0/hot ++ ++TEST "[ -z $(get_shd_process_pid)]" ++TEST ! $CLI volume heal dist_tier enable ++TEST ! $CLI volume heal dist_tier disable ++ ++# Commands should work on replicate/disperse volume. ++TEST $CLI volume create r2 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1 ++TEST "[ -z $(get_shd_process_pid)]" ++TEST $CLI volume start r2 ++ ++TEST $CLI volume tier r2 attach $H0:$B0/r2_hot ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++TEST $CLI volume heal r2 enable ++EXPECT "enable" volume_option r2 "cluster.self-heal-daemon" ++EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++TEST $CLI volume heal r2 disable ++EXPECT "disable" volume_option r2 "cluster.self-heal-daemon" ++EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++# Commands should work on disperse volume. ++TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2 ++TEST $CLI volume start ec2 ++ ++TEST $CLI volume tier ec2 attach replica 2 $H0:$B0/ec2_hot{1..4} ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++TEST $CLI volume heal ec2 enable ++EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon" ++EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++TEST $CLI volume heal ec2 disable ++EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon" ++EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++ ++#Check that shd graph is rewritten correctly on volume stop/start ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++TEST $CLI volume stop r2 ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++TEST $CLI volume stop ec2 ++# When both the volumes are stopped glustershd volfile is not modified just the ++# process is stopped ++TEST "[ -z $(get_shd_process_pid) ]" ++ ++TEST $CLI volume start r2 ++EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++ ++TEST $CLI volume start ec2 ++ ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate ++ ++TEST $CLI volume tier ec2 detach force ++ ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "N" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate ++ ++TEST $CLI volume set r2 self-heal-daemon on ++TEST $CLI volume set r2 cluster.self-heal-daemon off ++TEST ! $CLI volume set ec2 self-heal-daemon off ++TEST ! $CLI volume set ec2 cluster.self-heal-daemon on ++TEST ! $CLI volume set dist self-heal-daemon off ++TEST ! $CLI volume set dist cluster.self-heal-daemon on ++ ++TEST $CLI volume set ec2 disperse-self-heal-daemon off ++TEST $CLI volume set ec2 cluster.disperse-self-heal-daemon on ++TEST ! $CLI volume set r2 disperse-self-heal-daemon on ++TEST ! $CLI volume set r2 cluster.disperse-self-heal-daemon off ++TEST ! $CLI volume set dist disperse-self-heal-daemon off ++TEST ! $CLI volume set dist cluster.disperse-self-heal-daemon on ++ ++cleanup ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/tier-snapshot.t b/tests/basic/tier/tier-snapshot.t +new file mode 100644 +index 0000000..8747c5d +--- /dev/null ++++ b/tests/basic/tier/tier-snapshot.t +@@ -0,0 +1,47 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../snapshot.rc ++ ++cleanup; ++ ++TEST init_n_bricks 4; ++TEST setup_lvm 4; ++ ++TEST glusterd; ++ ++TEST $CLI volume create $V0 replica 2 $H0:$L1 $H0:$L2 ; ++ ++TEST $CLI volume start $V0; ++ ++TEST $CLI volume tier $V0 attach replica 2 $H0:$L3 $H0:$L4 ; ++ ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0; ++ ++for i in {1..10} ; do echo "file" > $M0/file$i ; done ++ ++TEST $CLI snapshot config activate-on-create enable ++ ++TEST $CLI snapshot create snap1 $V0 no-timestamp; ++ ++for i in {11..20} ; do echo "file" > $M0/file$i ; done ++ ++TEST $CLI snapshot create snap2 $V0 no-timestamp; ++ ++mkdir $M0/dir1; ++mkdir $M0/dir2; ++ ++for i in {1..10} ; do echo "foo" > $M0/dir1/foo$i ; done ++for i in {1..10} ; do echo "foo" > $M0/dir2/foo$i ; done ++ ++TEST $CLI snapshot create snap3 $V0 no-timestamp; ++ ++for i in {11..20} ; do echo "foo" > $M0/dir1/foo$i ; done ++for i in {11..20} ; do echo "foo" > $M0/dir2/foo$i ; done ++ ++TEST $CLI snapshot create snap4 $V0 no-timestamp; ++ ++TEST $CLI snapshot delete all; ++ ++cleanup; ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/basic/tier/tier.t b/tests/basic/tier/tier.t +new file mode 100755 +index 0000000..1798541 +--- /dev/null ++++ b/tests/basic/tier/tier.t +@@ -0,0 +1,219 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++LAST_BRICK=3 ++CACHE_BRICK_FIRST=4 ++CACHE_BRICK_LAST=5 ++DEMOTE_TIMEOUT=12 ++PROMOTE_TIMEOUT=5 ++MIGRATION_TIMEOUT=10 ++DEMOTE_FREQ=4 ++PROMOTE_FREQ=12 ++ ++function file_on_slow_tier { ++ found=0 ++ ++ for i in `seq 0 $LAST_BRICK`; do ++ test -e "$B0/${V0}${i}/$1" && found=1 && break; ++ done ++ ++ if [ "$found" == "1" ] ++ then ++ slow_hash1=$2 ++ slow_hash2=$(fingerprint "$B0/${V0}${i}/$1") ++ ++ if [ "$slow_hash1" == "$slow_hash2" ] ++ then ++ echo "0" ++ else ++ echo "2" ++ fi ++ else ++ echo "1" ++ fi ++ ++ # temporarily disable non-Linux tests. ++ case $OSTYPE in ++ NetBSD | FreeBSD | Darwin) ++ echo "0" ++ ;; ++ esac ++} ++ ++function file_on_fast_tier { ++ found=0 ++ ++ for j in `seq $CACHE_BRICK_FIRST $CACHE_BRICK_LAST`; do ++ test -e "$B0/${V0}${j}/$1" && found=1 && break; ++ done ++ ++ ++ if [ "$found" == "1" ] ++ then ++ fast_hash1=$2 ++ fast_hash2=$(fingerprint "$B0/${V0}${j}/$1") ++ ++ if [ "$fast_hash1" == "$fast_hash2" ] ++ then ++ echo "0" ++ else ++ echo "2" ++ fi ++ else ++ echo "1" ++ fi ++} ++ ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++# testing bug 1215122, ie should fail if replica count and bricks are not compatible. ++ ++TEST ! $CLI volume tier $V0 attach replica 5 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++ ++TEST $CLI volume start $V0 ++ ++# The following two commands instigate a graph switch. Do them ++# before attaching the tier. If done on a tiered volume the rebalance ++# daemon will terminate and must be restarted manually. ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++ ++#Not a tier volume ++TEST ! $CLI volume set $V0 cluster.tier-demote-frequency 4 ++ ++#testing bug #1228112, glusterd crashed when trying to detach-tier commit force on a non-tiered volume. ++TEST ! $CLI volume tier $V0 detach commit force ++ ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++ ++TEST $CLI volume set $V0 cluster.tier-mode test ++ ++# create a file, make sure it can be deleted after attach tier. ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++cd $M0 ++TEST touch delete_me.txt ++TEST rm -f delete_me.txt ++ ++# confirm watermark CLI works ++TEST $CLI volume set $V0 cluster.watermark-hi 85 ++TEST $CLI volume set $V0 cluster.watermark-low 75 ++TEST $CLI volume set $V0 cluster.tier-max-mb 1000 ++TEST $CLI volume set $V0 cluster.tier-max-files 1000 ++TEST $CLI volume set $V0 cluster.tier-max-promote-file-size 1000 ++TEST ! $CLI volume set $V0 cluster.tier-max-files -3 ++TEST ! $CLI volume set $V0 cluster.watermark-low 90 ++TEST ! $CLI volume set $V0 cluster.watermark-hi 75 ++TEST ! $CLI volume set $V0 cluster.read-freq-threshold -12 ++TEST ! $CLI volume set $V0 cluster.write-freq-threshold -12 ++ ++#check for watermark reset ++TEST $CLI volume set $V0 cluster.watermark-low 10 ++TEST $CLI volume set $V0 cluster.watermark-hi 30 ++TEST ! $CLI volume reset $V0 cluster.watermark-low ++TEST $CLI volume reset $V0 cluster.watermark-hi ++TEST $CLI volume reset $V0 cluster.watermark-low ++ ++# stop the volume and restart it. The rebalance daemon should restart. ++cd /tmp ++umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 ++ ++wait_for_tier_start ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++cd $M0 ++ ++sleep_first_cycle $DEMOTE_FREQ ++$CLI volume tier $V0 status ++ ++#Tier options expect non-negative value ++TEST ! $CLI volume set $V0 cluster.tier-promote-frequency -1 ++ ++#Tier options expect non-negative value ++TEST ! $CLI volume set $V0 cluster.read-freq-threshold qwerty ++ ++ ++TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ ++# Basic operations. ++TEST stat . ++TEST mkdir d1 ++TEST [ -d d1 ] ++TEST touch d1/file1 ++TEST mkdir d1/d2 ++TEST [ -d d1/d2 ] ++TEST find d1 ++mkdir /tmp/d1 ++ ++# Create a file. It should be on the fast tier. ++uuidgen > /tmp/d1/data.txt ++md5data=$(fingerprint /tmp/d1/data.txt) ++mv /tmp/d1/data.txt ./d1/data.txt ++ ++TEST file_on_fast_tier d1/data.txt $md5data ++ ++uuidgen > /tmp/d1/data2.txt ++md5data2=$(fingerprint /tmp/d1/data2.txt) ++cp /tmp/d1/data2.txt ./d1/data2.txt ++ ++#File with spaces and special characters. ++SPACE_FILE="file with spaces & $peci@l ch@r@cter$ @!@$%^$#@^^*&%$#$%.txt" ++ ++uuidgen > "/tmp/d1/$SPACE_FILE" ++md5space=$(fingerprint "/tmp/d1/$SPACE_FILE") ++mv "/tmp/d1/$SPACE_FILE" "./d1/$SPACE_FILE" ++ ++# Check auto-demotion on write new. ++sleep $DEMOTE_TIMEOUT ++ ++# Check auto-promotion on write append. ++UUID=$(uuidgen) ++echo $UUID >> /tmp/d1/data2.txt ++md5data2=$(fingerprint /tmp/d1/data2.txt) ++ ++sleep_until_mid_cycle $DEMOTE_FREQ ++drop_cache $M0 ++ ++echo $UUID >> ./d1/data2.txt ++cat "./d1/$SPACE_FILE" ++ ++sleep $PROMOTE_TIMEOUT ++sleep $DEMOTE_FREQ ++EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 2 6 ++ ++# stop gluster, when it comes back info file should have tiered volume ++killall glusterd ++TEST glusterd ++ ++EXPECT "0" file_on_slow_tier d1/data.txt $md5data ++EXPECT "0" file_on_slow_tier d1/data2.txt $md5data2 ++EXPECT "0" file_on_slow_tier "./d1/$SPACE_FILE" $md5space ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" detach_start $V0 ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}${CACHE_BRICK_FIRST}" ++ ++TEST $CLI volume tier $V0 detach commit ++ ++EXPECT "0" confirm_tier_removed ${V0}${CACHE_BRICK_FIRST} ++ ++confirm_vol_stopped $V0 ++ ++cd; ++ ++cleanup ++rm -rf /tmp/d1 ++ ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/tier_lookup_heal.t b/tests/basic/tier/tier_lookup_heal.t +new file mode 100755 +index 0000000..c7c7f27 +--- /dev/null ++++ b/tests/basic/tier/tier_lookup_heal.t +@@ -0,0 +1,69 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++LAST_BRICK=1 ++CACHE_BRICK_FIRST=2 ++CACHE_BRICK_LAST=3 ++PROMOTE_TIMEOUT=5 ++ ++function file_on_fast_tier { ++ local ret="1" ++ ++ s1=$(md5sum $1) ++ s2=$(md5sum $B0/${V0}${CACHE_BRICK_FIRST}/$1) ++ ++ if [ -e $B0/${V0}${CACHE_BRICK_FIRST}/$1 ] && ! [ "$s1" == "$s2" ]; then ++ echo "0" ++ else ++ echo "1" ++ fi ++} ++ ++cleanup ++ ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create files before CTR xlator is on. ++cd $M0 ++TEST stat . ++TEST touch file1 ++TEST stat file1 ++ ++#Attach tier and switch ON CTR Xlator. ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++TEST $CLI volume set $V0 features.ctr-enabled on ++TEST $CLI volume set $V0 cluster.tier-demote-frequency 4 ++TEST $CLI volume set $V0 cluster.tier-promote-frequency 4 ++TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 cluster.tier-mode test ++ ++#The lookup should heal the database. ++TEST ls file1 ++ ++# gf_file_tb and gf_flink_tb should NOT be empty ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}$LAST_BRICK/.glusterfs/${V0}$LAST_BRICK.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 2 ] ++ ++# Heat-up the file ++uuidgen > file1 ++sleep 5 ++ ++#Check if the file is promoted ++EXPECT_WITHIN $PROMOTE_TIMEOUT "0" file_on_fast_tier file1 ++ ++cd; ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/basic/tier/tierd_check.t b/tests/basic/tier/tierd_check.t +new file mode 100644 +index 0000000..5701fa9 +--- /dev/null ++++ b/tests/basic/tier/tierd_check.t +@@ -0,0 +1,128 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++. $(dirname $0)/../../cluster.rc ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function check_peers { ++ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l ++} ++ ++function create_dist_tier_vol () { ++ TEST $CLI_1 volume create $V0 $H1:$B1/${V0} $H2:$B2/${V0} ++ TEST $CLI_1 volume start $V0 ++ TEST $CLI_1 volume tier $V0 attach $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 ++} ++ ++function tier_status () { ++ #$CLI_1 volume tier $V0 status | grep progress | wc -l ++ # I don't want to disable the entire test, but this part of it seems ++ # highly suspect. *Why* do we always expect the number of lines to be ++ # exactly two? What would it mean for it to be otherwise? Are we ++ # checking *correctness* of the result, or merely its *consistency* ++ # with what was observed at some unspecified time in the past? Does ++ # this check only serve to inhibit actual improvements? Until someone ++ # can answer these questions and explain why a hard-coded "2" is less ++ # arbitrary than what was here before, we might as well disable this ++ # part of the test. ++ echo "2" ++} ++ ++function tier_daemon_kill () { ++pkill -f "tierd/$V0" ++echo "$?" ++} ++ ++cleanup; ++ ++#setup cluster and test volume ++TEST launch_cluster 3; # start 3-node virtual cluster ++TEST $CLI_1 peer probe $H2; # peer probe server 2 from server 1 cli ++TEST $CLI_1 peer probe $H3; # peer probe server 3 from server 1 cli ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; ++ ++#Create and start a tiered volume ++create_dist_tier_vol ++ ++wait_for_tier_start ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 tier_daemon_check ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 tier_daemon_kill ++ ++TEST $CLI_1 volume tier $V0 start ++ ++wait_for_tier_start ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_kill ++ ++TEST $CLI_3 volume tier $V0 start force ++ ++wait_for_tier_start ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check ++ ++#The pattern progress should occur twice only. ++#it shouldn't come up on the third node without tierd even ++#after the tier start force is issued on the node without ++#tierd ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++#kill the node on which tier is not supposed to run ++TEST kill_node 3 ++ ++#bring the node back, it should not have tierd running on it ++TEST $glusterd_3; ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++#after volume restart, check for tierd ++ ++TEST $CLI_3 volume stop $V0 ++ ++TEST $CLI_3 volume start $V0 ++ ++wait_for_tier_start ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++#check for detach start and stop ++ ++TEST $CLI_3 volume tier $V0 detach start ++ ++TEST $CLI_3 volume tier $V0 detach stop ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++TEST $CLI_1 volume tier $V0 start force ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check ++ ++# To test for detach start fail while the brick is down ++ ++TEST pkill -f "$B1/$V0" ++ ++TEST ! $CLI_1 volume tier $V0 detach start ++ ++cleanup ++# This test isn't worth keeping. Besides the totally arbitrary tier_status ++# checks mentioned above, someone direct-coded pkill to kill bricks instead of ++# using the volume.rc function we already had. I can't be bothered fixing that, ++# and the next thing, and the next thing, unless there's a clear benefit to ++# doing so, and AFAICT the success or failure of this test tells us nothing ++# useful. Therefore, it's disabled until further notice. ++#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/unlink-during-migration.t b/tests/basic/tier/unlink-during-migration.t +new file mode 100755 +index 0000000..1330092 +--- /dev/null ++++ b/tests/basic/tier/unlink-during-migration.t +@@ -0,0 +1,92 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++DEMOTE_FREQ=5 ++PROMOTE_FREQ=5 ++ ++function create_dist_rep_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 replica 2 $H0:$B0/cold/${V0}{0..3} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume start $V0 ++} ++ ++function attach_dist_rep_tier () { ++ TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/hot/${V0}{0..3} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++ ++#Create and start a volume ++create_dist_rep_vol ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++# Create a large file (320MB), so that rebalance takes time ++TEST dd if=/dev/zero of=$M0/foo bs=64k count=5120 ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name foo` ++echo "File path on cold tier: "$CPATH ++ ++#Now attach the tier ++attach_dist_rep_tier ++ ++#Write into the file to promote it ++echo "good morning">>$M0/foo ++ ++# Wait for the tier process to promote the file ++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $CPATH ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name foo` ++ ++echo "File path on hot tier: "$HPATH ++TEST rm -rf $M0/foo ++TEST ! stat $HPATH ++TEST ! stat $CPATH ++ ++#unlink during demotion ++HPATH=""; ++CPATH=""; ++ ++# Create a large file (320MB), so that rebalance takes time ++TEST dd if=/dev/zero of=$M0/foo1 bs=64k count=5120 ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name foo1` ++echo "File path on hot tier : "$HPATH ++ ++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $HPATH ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name foo1` ++echo "File path on cold tier : "$CPATH ++ ++TEST rm -rf $M0/foo1 ++ ++TEST ! stat $HPATH ++TEST ! stat $CPATH ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t b/tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t +new file mode 100644 +index 0000000..3b62a45 +--- /dev/null ++++ b/tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t +@@ -0,0 +1,78 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{1..3} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{1..2} ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++function non_zero_check () { ++ if [ "$1" -ne 0 ] ++ then ++ echo "0" ++ else ++ echo "1" ++ fi ++} ++ ++function num_bricks_up { ++ local b ++ local n_up=0 ++ ++ for b in $B0/hot/${V0}{1..2} $B0/cold/${V0}{1..3}; do ++ if [ x"$(brick_up_status $V0 $H0 $b)" = x"1" ]; then ++ n_up=$((n_up+1)) ++ fi ++ done ++ ++ echo $n_up ++} ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume status ++ ++ ++#Create and start a tiered volume ++create_dist_tier_vol ++# Wait for the bricks to come up, *then* the tier daemon. ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 5 num_bricks_up ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 tier_daemon_check ++sleep 5 #wait for some time to run tier daemon ++time_before_restarting=$(rebalance_run_time $V0); ++ ++#checking for elapsed time after sleeping for two seconds. ++EXPECT "0" non_zero_check $time_before_restarting; ++ ++#Difference of elapsed time should be positive ++ ++kill -9 $(pidof glusterd); ++TEST glusterd; ++sleep 2; ++# Wait for the bricks to come up, *then* the tier daemon. ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 5 num_bricks_up ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check; ++sleep 1; ++time1=$(rebalance_run_time $V0); ++EXPECT "0" non_zero_check $time1; ++sleep 2; ++time2=$(rebalance_run_time $V0); ++EXPECT "0" non_zero_check $time2; ++diff=`expr $time2 - $time1` ++EXPECT "0" non_zero_check $diff; +diff --git a/tests/bugs/quota/bug-1288474.t b/tests/bugs/quota/bug-1288474.t +new file mode 100755 +index 0000000..b8f4ba3 +--- /dev/null ++++ b/tests/bugs/quota/bug-1288474.t +@@ -0,0 +1,51 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++NUM_BRICKS=2 ++ ++function create_dist_tier_vol () { ++ mkdir -p $B0/cold/${V0}{0..$1} ++ mkdir -p $B0/hot/${V0}{0..$1} ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 nfs.disable false ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++} ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++touch $M0/foobar ++ ++TEST $CLI volume quota $V0 enable ++TEST $CLI volume quota $V0 limit-usage / 10MB ++ ++EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quota_list_field "/" 5 ++ ++#check quota list after detach tier ++TEST $CLI volume tier $V0 detach start ++sleep 1 ++TEST $CLI volume tier $V0 detach force ++ ++EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quota_list_field "/" 5 ++ ++#check quota list after attach tier ++rm -rf $B0/hot ++mkdir $B0/hot ++TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ ++EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quota_list_field "/" 5 ++ ++TEST umount $M0 ++ ++cleanup; ++ +diff --git a/tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t b/tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t +new file mode 100644 +index 0000000..9863834 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t +@@ -0,0 +1,53 @@ ++#!/bin/bash ++#Self-heal tests ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} ++TEST $CLI volume set $V0 self-heal-daemon off ++TEST $CLI volume set $V0 entry-self-heal off ++TEST $CLI volume set $V0 metadata-self-heal off ++TEST $CLI volume set $V0 data-self-heal off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume start $V0 ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/brick{2,3} ++TEST $CLI volume bitrot $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count ++TEST $CLI volume bitrot $V0 scrub-frequency hourly ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++TEST dd if=/dev/urandom of=$M0/FILE bs=1024 count=1 ++ ++#Corrupt file from back-end ++TEST stat $B0/brick3/FILE ++echo "Corrupted data" >> $B0/brick3/FILE ++#Manually set bad-file xattr since we can't wait for an hour. ++TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B0/brick3/FILE ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status'; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count ++#Trigger lookup so that bitrot xlator marks file as bad in its inode context. ++stat $M0/FILE ++# Remove hot-tier ++TEST $CLI volume tier $V0 detach start ++sleep 1 ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" detach_tier_status_field_complete $V0 ++TEST $CLI volume tier $V0 detach commit ++#Test that file has migrated to cold tier. ++EXPECT "1024" stat -c "%s" $B0/brick0/FILE ++EXPECT "1024" stat -c "%s" $B0/brick1/FILE ++TEST umount $M0 ++cleanup +diff --git a/tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t b/tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t +new file mode 100644 +index 0000000..b2d382a +--- /dev/null ++++ b/tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t +@@ -0,0 +1,72 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++LAST_BRICK=3 ++CACHE_BRICK_FIRST=4 ++CACHE_BRICK_LAST=5 ++ ++cleanup ++ ++# Start glusterd [1-2] ++TEST glusterd ++TEST pidof glusterd ++ ++# Set-up tier cluster [3-4] ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++TEST $CLI volume start $V0 ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++ ++# Start and mount the volume after enabling CTR and trash [5-8] ++TEST $CLI volume set $V0 features.ctr-enabled on ++TEST $CLI volume set $V0 features.trash on ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create an empty file ++touch $M0/foo ++ ++# gf_file_tb and gf_flink_tb should contain one entry each [9] ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 2 ] ++ ++# Create two hard links ++ln $M0/foo $M0/lnk1 ++ln $M0/foo $M0/lnk2 ++ ++# Now gf_flink_tb should contain 3 entries [10] ++ENTRY_COUNT=$(echo "select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 3 ] ++ ++# Delete the hard link ++rm -rf $M0/lnk1 ++ ++# Corresponding hard link entry must be removed from gf_flink_tb ++# but gf_file_tb should still contain the file entry [11] ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 3 ] ++ ++# Remove the file ++rm -rf $M0/foo ++ ++# Another hardlink removed [12] ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 2 ] ++ ++# Remove the last hardlink ++rm -rf $M0/lnk2 ++ ++# All entried must be removed from gf_flink_tb and gf_file_tb [13] ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 0 ] ++ ++cleanup ++ ++ ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/bugs/tier/bug-1279376-rename-demoted-file.t b/tests/bugs/tier/bug-1279376-rename-demoted-file.t +new file mode 100755 +index 0000000..c4a50d9 +--- /dev/null ++++ b/tests/bugs/tier/bug-1279376-rename-demoted-file.t +@@ -0,0 +1,93 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++NUM_BRICKS=2 ++DEMOTE_FREQ=15 ++DEMOTE_TIMEOUT=10 ++PROMOTE_FREQ=500 ++ ++ ++#Both src and dst files must hash to the same hot tier subvol ++SRC_FILE="file1.txt" ++DST_FILE="newfile1.txt" ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-mode test ++ ++#We do not want any files to be promoted during this test ++ TEST $CLI volume set $V0 features.record-counters on ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 50 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 50 ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++ ++# The file will be created on the hot tier ++ ++TEST touch "$M0/$SRC_FILE" ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name "$SRC_FILE"` ++echo "File path on hot tier: "$HPATH ++ ++ ++EXPECT "yes" exists_and_regular_file $HPATH ++ ++# Wait for the tier process to demote the file ++sleep $DEMOTE_FREQ ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name "$SRC_FILE"` ++echo "File path on cold tier: "$CPATH ++ ++EXPECT_WITHIN $DEMOTE_TIMEOUT "yes" exists_and_regular_file $CPATH ++ ++#We don't want $DST_FILE to get demoted ++TEST $CLI volume set $V0 cluster.tier-demote-frequency $PROMOTE_FREQ ++ ++#This will be created on the hot tier ++ ++touch "$M0/$DST_FILE" ++HPATH=`find $B0/hot/ -name "$DST_FILE"` ++echo "File path on hot tier: "$HPATH ++ ++TEST mv $M0/$SRC_FILE $M0/$DST_FILE ++ ++# We expect a single file to exist at this point ++# when viewed on the mountpoint ++EXPECT 1 echo $(ls -l $M0 | grep $DST_FILE | wc -l) ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index b7c7bd9..ed24858 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -1859,6 +1859,78 @@ out: + return ret; + } + ++#if USE_GFDB /* only add changetimerecorder when GFDB is enabled */ ++static int ++brick_graph_add_changetimerecorder(volgen_graph_t *graph, ++ glusterd_volinfo_t *volinfo, ++ dict_t *set_dict, ++ glusterd_brickinfo_t *brickinfo) ++{ ++ xlator_t *xl = NULL; ++ int ret = -1; ++ char *brickname = NULL; ++ char *path = NULL; ++ char index_basepath[PATH_MAX] = {0}; ++ char *hotbrick = NULL; ++ ++ if (!graph || !volinfo || !set_dict || !brickinfo) ++ goto out; ++ ++ path = brickinfo->path; ++ ++ xl = volgen_graph_add(graph, "features/changetimerecorder", ++ volinfo->volname); ++ if (!xl) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "db-type", "sqlite3"); ++ if (ret) ++ goto out; ++ ++ if (!set_dict || dict_get_str(set_dict, "hot-brick", &hotbrick)) ++ hotbrick = "off"; ++ ++ ret = xlator_set_fixed_option(xl, "hot-brick", hotbrick); ++ if (ret) ++ goto out; ++ ++ brickname = strrchr(path, '/') + 1; ++ snprintf(index_basepath, sizeof(index_basepath), "%s.db", brickname); ++ ret = xlator_set_fixed_option(xl, "db-name", index_basepath); ++ if (ret) ++ goto out; ++ ++ snprintf(index_basepath, sizeof(index_basepath), "%s/%s", path, ++ ".glusterfs/"); ++ ret = xlator_set_fixed_option(xl, "db-path", index_basepath); ++ if (ret) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "record-exit", "off"); ++ if (ret) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "ctr_link_consistency", "off"); ++ if (ret) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "ctr_lookupheal_link_timeout", "300"); ++ if (ret) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "ctr_lookupheal_inode_timeout", "300"); ++ if (ret) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "record-entry", "on"); ++ if (ret) ++ goto out; ++ ++out: ++ return ret; ++} ++#endif /* USE_GFDB */ ++ + static int + brick_graph_add_acl(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +@@ -2615,6 +2687,9 @@ static volgen_brick_xlator_t server_graph_table[] = { + {brick_graph_add_acl, "acl"}, + {brick_graph_add_bitrot_stub, "bitrot-stub"}, + {brick_graph_add_changelog, "changelog"}, ++#if USE_GFDB /* changetimerecorder depends on gfdb */ ++ {brick_graph_add_changetimerecorder, "changetimerecorder"}, ++#endif + {brick_graph_add_bd, "bd"}, + {brick_graph_add_trash, "trash"}, + {brick_graph_add_arbiter, "arbiter"}, +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index c8f6e67..a877805 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -11,6 +11,474 @@ cases as published by the Free Software Foundation. + #include "glusterd-volgen.h" + #include "glusterd-utils.h" + ++#if USE_GFDB /* no GFDB means tiering is disabled */ ++ ++static int ++get_tier_freq_threshold(glusterd_volinfo_t *volinfo, char *threshold_key) ++{ ++ int threshold = 0; ++ char *str_thresold = NULL; ++ int ret = -1; ++ xlator_t *this = NULL; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ glusterd_volinfo_get(volinfo, threshold_key, &str_thresold); ++ if (str_thresold) { ++ ret = gf_string2int(str_thresold, &threshold); ++ if (ret == -1) { ++ threshold = ret; ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "Failed to convert " ++ "string to integer"); ++ } ++ } ++ ++ return threshold; ++} ++ ++/* ++ * Validation function for record-counters ++ * if write-freq-threshold and read-freq-threshold both have non-zero values ++ * record-counters cannot be set to off ++ * if record-counters is set to on ++ * check if both the frequency thresholds are zero, then pop ++ * a note, but volume set is not failed. ++ * */ ++static int ++validate_tier_counters(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, ++ char *value, char **op_errstr) ++{ ++ char errstr[2048] = ""; ++ int ret = -1; ++ xlator_t *this = NULL; ++ gf_boolean_t origin_val = -1; ++ int current_wt = 0; ++ int current_rt = 0; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) { ++ snprintf(errstr, sizeof(errstr), ++ "Volume %s is not a tier " ++ "volume. Option %s is only valid for tier volume.", ++ volinfo->volname, key); ++ goto out; ++ } ++ ++ ret = gf_string2boolean(value, &origin_val); ++ if (ret) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a compatible " ++ "value. %s expects an boolean value", ++ value, key); ++ goto out; ++ } ++ ++ current_rt = get_tier_freq_threshold(volinfo, ++ "cluster.read-freq-threshold"); ++ if (current_rt == -1) { ++ snprintf(errstr, sizeof(errstr), ++ " Failed to retrieve value" ++ " of cluster.read-freq-threshold"); ++ goto out; ++ } ++ current_wt = get_tier_freq_threshold(volinfo, ++ "cluster.write-freq-threshold"); ++ if (current_wt == -1) { ++ snprintf(errstr, sizeof(errstr), ++ " Failed to retrieve value " ++ "of cluster.write-freq-threshold"); ++ goto out; ++ } ++ /* If record-counters is set to off */ ++ if (!origin_val) { ++ /* Both the thresholds should be zero to set ++ * record-counters to off*/ ++ if (current_rt || current_wt) { ++ snprintf(errstr, sizeof(errstr), ++ "Cannot set features.record-counters to \"%s\"" ++ " as cluster.write-freq-threshold is %d" ++ " and cluster.read-freq-threshold is %d. Please" ++ " set both cluster.write-freq-threshold and " ++ " cluster.read-freq-threshold to 0, to set " ++ " features.record-counters to \"%s\".", ++ value, current_wt, current_rt, value); ++ ret = -1; ++ goto out; ++ } ++ } ++ /* TODO give a warning message to the user. errstr without re = -1 will ++ * not result in a warning on cli for now. ++ else { ++ if (!current_rt && !current_wt) { ++ snprintf (errstr, sizeof (errstr), ++ " Note : cluster.write-freq-threshold is %d" ++ " and cluster.read-freq-threshold is %d. Please" ++ " set both cluster.write-freq-threshold and " ++ " cluster.read-freq-threshold to" ++ " appropriate positive values.", ++ current_wt, current_rt); ++ } ++ }*/ ++ ++ ret = 0; ++out: ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ } ++ ++ return ret; ++} ++ ++/* ++ * Validation function for ctr sql params ++ * features.ctr-sql-db-cachesize (Range: 1000 to 262144 pages) ++ * features.ctr-sql-db-wal-autocheckpoint (Range: 1000 to 262144 pages) ++ * */ ++static int ++validate_ctr_sql_params(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, ++ char *value, char **op_errstr) ++{ ++ int ret = -1; ++ xlator_t *this = NULL; ++ char errstr[2048] = ""; ++ int origin_val = -1; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ ret = gf_string2int(value, &origin_val); ++ if (ret) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a compatible " ++ "value. %s expects an integer value.", ++ value, key); ++ ret = -1; ++ goto out; ++ } ++ ++ if (origin_val < 0) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a " ++ "compatible value. %s expects a positive" ++ "integer value.", ++ value, key); ++ ret = -1; ++ goto out; ++ } ++ ++ if (strstr(key, "sql-db-cachesize") || ++ strstr(key, "sql-db-wal-autocheckpoint")) { ++ if ((origin_val < 1000) || (origin_val > 262144)) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a " ++ "compatible value. %s " ++ "expects a value between : " ++ "1000 to 262144.", ++ value, key); ++ ret = -1; ++ goto out; ++ } ++ } ++ ++ ret = 0; ++out: ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ } ++ return ret; ++} ++ ++/* Validation for tiering frequency thresholds ++ * If any of the frequency thresholds are set to a non-zero value, ++ * switch record-counters on, if not already on ++ * If both the frequency thresholds are set to zero, ++ * switch record-counters off, if not already off ++ * */ ++static int ++validate_tier_thresholds(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, ++ char *value, char **op_errstr) ++{ ++ char errstr[2048] = ""; ++ int ret = -1; ++ xlator_t *this = NULL; ++ int origin_val = -1; ++ gf_boolean_t current_rc = _gf_false; ++ int current_wt = 0; ++ int current_rt = 0; ++ gf_boolean_t is_set_rc = _gf_false; ++ char *proposed_rc = NULL; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) { ++ snprintf(errstr, sizeof(errstr), ++ "Volume %s is not a tier " ++ "volume. Option %s is only valid for tier volume.", ++ volinfo->volname, key); ++ goto out; ++ } ++ ++ ret = gf_string2int(value, &origin_val); ++ if (ret) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a compatible " ++ "value. %s expects an integer value.", ++ value, key); ++ ret = -1; ++ goto out; ++ } ++ ++ if (origin_val < 0) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a " ++ "compatible value. %s expects a positive" ++ "integer value.", ++ value, key); ++ ret = -1; ++ goto out; ++ } ++ ++ /* Get the record-counters value */ ++ ret = glusterd_volinfo_get_boolean(volinfo, "features.record-counters"); ++ if (ret == -1) { ++ snprintf(errstr, sizeof(errstr), ++ "Failed to retrieve value of" ++ "features.record-counters from volume info"); ++ goto out; ++ } ++ current_rc = ret; ++ ++ /* if any of the thresholds are set to a non-zero value ++ * switch record-counters on, if not already on*/ ++ if (origin_val > 0) { ++ if (!current_rc) { ++ is_set_rc = _gf_true; ++ current_rc = _gf_true; ++ } ++ } else { ++ /* if the set is for write-freq-threshold */ ++ if (strstr(key, "write-freq-threshold")) { ++ current_rt = get_tier_freq_threshold(volinfo, ++ "cluster.read-freq-threshold"); ++ if (current_rt == -1) { ++ snprintf(errstr, sizeof(errstr), ++ " Failed to retrieve value of" ++ "cluster.read-freq-threshold"); ++ goto out; ++ } ++ current_wt = origin_val; ++ } ++ /* else it should be read-freq-threshold */ ++ else { ++ current_wt = get_tier_freq_threshold( ++ volinfo, "cluster.write-freq-threshold"); ++ if (current_wt == -1) { ++ snprintf(errstr, sizeof(errstr), ++ " Failed to retrieve value of" ++ "cluster.write-freq-threshold"); ++ goto out; ++ } ++ current_rt = origin_val; ++ } ++ ++ /* Since both the thresholds are zero, set record-counters ++ * to off, if not already off */ ++ if (current_rt == 0 && current_wt == 0) { ++ if (current_rc) { ++ is_set_rc = _gf_true; ++ current_rc = _gf_false; ++ } ++ } ++ } ++ ++ /* if record-counter has to be set to proposed value */ ++ if (is_set_rc) { ++ if (current_rc) { ++ ret = gf_asprintf(&proposed_rc, "on"); ++ } else { ++ ret = gf_asprintf(&proposed_rc, "off"); ++ } ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "Failed to allocate memory to dict_value"); ++ goto error; ++ } ++ ret = dict_set_str(volinfo->dict, "features.record-counters", ++ proposed_rc); ++ error: ++ if (ret) { ++ snprintf(errstr, sizeof(errstr), ++ "Failed to set features.record-counters" ++ "to \"%s\" automatically." ++ "Please try to set features.record-counters " ++ "\"%s\" manually. The options " ++ "cluster.write-freq-threshold and " ++ "cluster.read-freq-threshold can only " ++ "be set to a non zero value, if " ++ "features.record-counters is " ++ "set to \"on\".", ++ proposed_rc, proposed_rc); ++ goto out; ++ } ++ } ++ ret = 0; ++out: ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ if (proposed_rc) ++ GF_FREE(proposed_rc); ++ } ++ return ret; ++} ++ ++static int ++validate_tier(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value, ++ char **op_errstr) ++{ ++ char errstr[2048] = ""; ++ int ret = 0; ++ xlator_t *this = NULL; ++ int origin_val = -1; ++ char *current_wm_hi = NULL; ++ char *current_wm_low = NULL; ++ uint64_t wm_hi = 0; ++ uint64_t wm_low = 0; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) { ++ snprintf(errstr, sizeof(errstr), ++ "Volume %s is not a tier " ++ "volume. Option %s is only valid for tier volume.", ++ volinfo->volname, key); ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ ret = -1; ++ goto out; ++ } ++ ++ if (strstr(key, "cluster.tier-mode")) { ++ if (strcmp(value, "test") && strcmp(value, "cache")) { ++ ret = -1; ++ goto out; ++ } ++ goto out; ++ } else if (strstr(key, "tier-pause")) { ++ if (strcmp(value, "off") && strcmp(value, "on")) { ++ ret = -1; ++ goto out; ++ } ++ goto out; ++ } else if (strstr(key, "tier-compact")) { ++ if (strcmp(value, "on") && strcmp(value, "off")) { ++ ret = -1; ++ goto out; ++ } ++ ++ goto out; ++ } ++ ++ /* ++ * Rest of the volume set options for tier are expecting a positive ++ * Integer. Change the function accordingly if this constraint is ++ * changed. ++ */ ++ ret = gf_string2int(value, &origin_val); ++ if (ret) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a compatible " ++ "value. %s expects an integer value.", ++ value, key); ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ ret = -1; ++ goto out; ++ } ++ ++ if (strstr(key, "watermark-hi") || strstr(key, "watermark-low")) { ++ if ((origin_val < 1) || (origin_val > 99)) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a " ++ "compatible value. %s expects a " ++ "percentage from 1-99.", ++ value, key); ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ ret = -1; ++ goto out; ++ } ++ ++ if (strstr(key, "watermark-hi")) { ++ wm_hi = origin_val; ++ } else { ++ glusterd_volinfo_get(volinfo, "cluster.watermark-hi", ++ ¤t_wm_hi); ++ gf_string2bytesize_uint64(current_wm_hi, &wm_hi); ++ } ++ ++ if (strstr(key, "watermark-low")) { ++ wm_low = origin_val; ++ } else { ++ glusterd_volinfo_get(volinfo, "cluster.watermark-low", ++ ¤t_wm_low); ++ gf_string2bytesize_uint64(current_wm_low, &wm_low); ++ } ++ if (wm_low >= wm_hi) { ++ snprintf(errstr, sizeof(errstr), ++ "lower watermark" ++ " cannot be equal or exceed upper " ++ "watermark."); ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ ret = -1; ++ goto out; ++ } ++ } else if (strstr(key, "tier-promote-frequency") || ++ strstr(key, "tier-max-mb") || ++ strstr(key, "tier-max-promote-file-size") || ++ strstr(key, "tier-max-files") || ++ strstr(key, "tier-demote-frequency") || ++ strstr(key, "tier-hot-compact-frequency") || ++ strstr(key, "tier-cold-compact-frequency") || ++ strstr(key, "tier-query-limit")) { ++ if (origin_val < 1) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a " ++ " compatible value. %s expects a positive " ++ "integer value greater than 0.", ++ value, key); ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ ret = -1; ++ goto out; ++ } ++ } ++out: ++ gf_msg_debug(this->name, 0, "Returning %d", ret); ++ ++ return ret; ++} ++ ++#endif /* End for USE_GFDB */ ++ + static int + validate_cache_max_min_size(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) +@@ -2485,6 +2953,261 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "/var/run/gluster/shared_storage on enabling this " + "option. Unmount and delete the shared storage volume " + " on disabling this option."}, ++#if USE_GFDB /* no GFDB means tiering is disabled */ ++ /* tier translator - global tunables */ ++ {.key = "cluster.write-freq-threshold", ++ .voltype = "cluster/tier", ++ .value = "0", ++ .option = "write-freq-threshold", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier_thresholds, ++ .description = "Defines the number of writes, in a promotion/demotion" ++ " cycle, that would mark a file HOT for promotion. Any" ++ " file that has write hits less than this value will " ++ "be considered as COLD and will be demoted."}, ++ {.key = "cluster.read-freq-threshold", ++ .voltype = "cluster/tier", ++ .value = "0", ++ .option = "read-freq-threshold", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier_thresholds, ++ .description = "Defines the number of reads, in a promotion/demotion " ++ "cycle, that would mark a file HOT for promotion. Any " ++ "file that has read hits less than this value will be " ++ "considered as COLD and will be demoted."}, ++ { ++ .key = "cluster.tier-pause", ++ .voltype = "cluster/tier", ++ .option = "tier-pause", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ }, ++ { ++ .key = "cluster.tier-promote-frequency", ++ .voltype = "cluster/tier", ++ .value = "120", ++ .option = "tier-promote-frequency", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ }, ++ { ++ .key = "cluster.tier-demote-frequency", ++ .voltype = "cluster/tier", ++ .value = "3600", ++ .option = "tier-demote-frequency", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ }, ++ {.key = "cluster.watermark-hi", ++ .voltype = "cluster/tier", ++ .value = "90", ++ .option = "watermark-hi", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = ++ "Upper % watermark for promotion. If hot tier fills" ++ " above this percentage, no promotion will happen and demotion will " ++ "happen with high probability."}, ++ {.key = "cluster.watermark-low", ++ .voltype = "cluster/tier", ++ .value = "75", ++ .option = "watermark-low", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = ++ "Lower % watermark. If hot tier is less " ++ "full than this, promotion will happen and demotion will not happen. " ++ "If greater than this, promotion/demotion will happen at a " ++ "probability " ++ "relative to how full the hot tier is."}, ++ {.key = "cluster.tier-mode", ++ .voltype = "cluster/tier", ++ .option = "tier-mode", ++ .value = "cache", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = ++ "Either 'test' or 'cache'. Test mode periodically" ++ " demotes or promotes files automatically based on access." ++ " Cache mode does so based on whether the cache is full or not," ++ " as specified with watermarks."}, ++ {.key = "cluster.tier-max-promote-file-size", ++ .voltype = "cluster/tier", ++ .option = "tier-max-promote-file-size", ++ .value = "0", ++ .op_version = GD_OP_VERSION_3_7_10, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = ++ "The maximum file size in bytes that is promoted. If 0, there" ++ " is no maximum size (default)."}, ++ {.key = "cluster.tier-max-mb", ++ .voltype = "cluster/tier", ++ .option = "tier-max-mb", ++ .value = "4000", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = "The maximum number of MB that may be migrated" ++ " in any direction in a given cycle by a single node."}, ++ {.key = "cluster.tier-max-files", ++ .voltype = "cluster/tier", ++ .option = "tier-max-files", ++ .value = "10000", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = "The maximum number of files that may be migrated" ++ " in any direction in a given cycle by a single node."}, ++ {.key = "cluster.tier-query-limit", ++ .voltype = "cluster/tier", ++ .option = "tier-query-limit", ++ .value = "100", ++ .op_version = GD_OP_VERSION_3_9_1, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .type = NO_DOC, ++ .description = "The maximum number of files that may be migrated " ++ "during an emergency demote. An emergency condition " ++ "is flagged when writes breach the hi-watermark."}, ++ {.key = "cluster.tier-compact", ++ .voltype = "cluster/tier", ++ .option = "tier-compact", ++ .value = "on", ++ .op_version = GD_OP_VERSION_3_9_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = "Activate or deactivate the compaction of the DB" ++ " for the volume's metadata."}, ++ { ++ .key = "cluster.tier-hot-compact-frequency", ++ .voltype = "cluster/tier", ++ .value = "604800", ++ .option = "tier-hot-compact-frequency", ++ .op_version = GD_OP_VERSION_3_9_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ }, ++ { ++ .key = "cluster.tier-cold-compact-frequency", ++ .voltype = "cluster/tier", ++ .value = "604800", ++ .option = "tier-cold-compact-frequency", ++ .op_version = GD_OP_VERSION_3_9_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ }, ++ {.key = "features.ctr-enabled", ++ .voltype = "features/changetimerecorder", ++ .value = "off", ++ .option = "ctr-enabled", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .description = "Enable CTR xlator"}, ++ {.key = "features.record-counters", ++ .voltype = "features/changetimerecorder", ++ .value = "off", ++ .option = "record-counters", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .validate_fn = validate_tier_counters, ++ .description = "Its a Change Time Recorder Xlator option to " ++ "enable recording write " ++ "and read heat counters. The default is disabled. " ++ "If enabled, \"cluster.write-freq-threshold\" and " ++ "\"cluster.read-freq-threshold\" defined the number " ++ "of writes (or reads) to a given file are needed " ++ "before triggering migration."}, ++ {.key = "features.ctr-record-metadata-heat", ++ .voltype = "features/changetimerecorder", ++ .value = "off", ++ .option = "ctr-record-metadata-heat", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .type = NO_DOC, ++ .description = "Its a Change Time Recorder Xlator option to " ++ "enable recording write heat on metadata of the file. " ++ "The default is disabled. " ++ "Metadata is inode attributes like atime, mtime," ++ " permissions etc and " ++ "extended attributes of a file ."}, ++ {.key = "features.ctr_link_consistency", ++ .voltype = "features/changetimerecorder", ++ .value = "off", ++ .option = "ctr_link_consistency", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .type = NO_DOC, ++ .description = "Enable a crash consistent way of recording hardlink " ++ "updates by Change Time Recorder Xlator. " ++ "When recording in a crash " ++ "consistent way the data operations will " ++ "experience more latency."}, ++ {.key = "features.ctr_lookupheal_link_timeout", ++ .voltype = "features/changetimerecorder", ++ .value = "300", ++ .option = "ctr_lookupheal_link_timeout", ++ .op_version = GD_OP_VERSION_3_7_2, ++ .type = NO_DOC, ++ .description = "Defines the expiry period of in-memory " ++ "hardlink of an inode," ++ "used by lookup heal in Change Time Recorder." ++ "Once the expiry period" ++ "hits an attempt to heal the database per " ++ "hardlink is done and the " ++ "in-memory hardlink period is reset"}, ++ {.key = "features.ctr_lookupheal_inode_timeout", ++ .voltype = "features/changetimerecorder", ++ .value = "300", ++ .option = "ctr_lookupheal_inode_timeout", ++ .op_version = GD_OP_VERSION_3_7_2, ++ .type = NO_DOC, ++ .description = "Defines the expiry period of in-memory inode," ++ "used by lookup heal in Change Time Recorder. " ++ "Once the expiry period" ++ "hits an attempt to heal the database per " ++ "inode is done"}, ++ {.key = "features.ctr-sql-db-cachesize", ++ .voltype = "features/changetimerecorder", ++ .value = "12500", ++ .option = "sql-db-cachesize", ++ .validate_fn = validate_ctr_sql_params, ++ .op_version = GD_OP_VERSION_3_7_7, ++ .description = "Defines the cache size of the sqlite database of " ++ "changetimerecorder xlator." ++ "The input to this option is in pages." ++ "Each page is 4096 bytes. Default value is 12500 " ++ "pages." ++ "The max value is 262144 pages i.e 1 GB and " ++ "the min value is 1000 pages i.e ~ 4 MB. "}, ++ {.key = "features.ctr-sql-db-wal-autocheckpoint", ++ .voltype = "features/changetimerecorder", ++ .value = "25000", ++ .option = "sql-db-wal-autocheckpoint", ++ .validate_fn = validate_ctr_sql_params, ++ .op_version = GD_OP_VERSION_3_7_7, ++ .description = "Defines the autocheckpoint of the sqlite database of " ++ " changetimerecorder. " ++ "The input to this option is in pages. " ++ "Each page is 4096 bytes. Default value is 25000 " ++ "pages." ++ "The max value is 262144 pages i.e 1 GB and " ++ "the min value is 1000 pages i.e ~4 MB."}, ++ {.key = VKEY_FEATURES_SELINUX, ++ .voltype = "features/selinux", ++ .type = NO_DOC, ++ .value = "on", ++ .op_version = GD_OP_VERSION_3_11_0, ++ .description = "Convert security.selinux xattrs to " ++ "trusted.gluster.selinux on the bricks. Recommended " ++ "to have enabled when clients and/or bricks support " ++ "SELinux."}, ++ ++#endif /* USE_GFDB */ + { + .key = "locks.trace", + .voltype = "features/locks", +-- +1.8.3.1 + diff --git a/SOURCES/0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch b/SOURCES/0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch new file mode 100644 index 0000000..1f4a767 --- /dev/null +++ b/SOURCES/0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch @@ -0,0 +1,89 @@ +From 144f2eb56d1bbecc9c455065755f41ec81974e3e Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Sun, 7 Apr 2019 21:54:07 +0530 +Subject: [PATCH 087/124] ganesha : fixing minor issues after the backport from + 3.4 + +label : DOWNSTREAM ONLY + +Change-Id: Ib0f6d8728d2e33da63ed4baab0bb981a0b06a8e0 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167168 +Tested-by: RHGS Build Bot +Reviewed-by: Kaleb Keithley +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 20 +++++++++++++++++--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +- + 2 files changed, 18 insertions(+), 4 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index e0607ba..f6b823d 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -883,6 +883,15 @@ sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sha + install -D -p -m 0644 extras/glusterfs-logrotate \ + %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs + ++# ganesha ghosts ++%if ( 0%{!?_without_server:1} ) ++mkdir -p %{buildroot}%{_sysconfdir}/ganesha ++touch %{buildroot}%{_sysconfdir}/ganesha/ganesha-ha.conf ++mkdir -p %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ ++touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf ++touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf ++%endif ++ + %if ( 0%{!?_without_georeplication:1} ) + mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication + touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf +@@ -1197,7 +1206,7 @@ exit 0 + + %if ( 0%{?_without_server:1} ) + #exclude ganesha related files +-%exclude %{_sysconfdir}/ganesha/* ++%exclude %{_sysconfdir}/ganesha/ganesha-ha.conf.sample + %exclude %{_libexecdir}/ganesha/* + %exclude %{_prefix}/lib/ocf/resource.d/heartbeat/* + %endif +@@ -1376,9 +1385,15 @@ exit 0 + + %if ( 0%{!?_without_server:1} ) + %files ganesha +-%{_sysconfdir}/ganesha/* ++%dir %{_libexecdir}/ganesha ++%{_sysconfdir}/ganesha/ganesha-ha.conf.sample + %{_libexecdir}/ganesha/* + %{_prefix}/lib/ocf/resource.d/heartbeat/* ++%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh ++%ghost %attr(0644,-,-) %config(noreplace) %{_sysconfdir}/ganesha/ganesha-ha.conf ++%ghost %dir %attr(0755,-,-) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha ++%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf ++%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf + %endif + + %if ( 0%{!?_without_ocf:1} ) +@@ -1508,7 +1523,6 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S29CTDBsetup.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S30samba-start.sh +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index d882105..0a16925 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -278,7 +278,7 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr) + goto out; + } + +- if (strcmp(value, "enable")) { ++ if (strcmp(value, "enable") == 0) { + ret = start_ganesha(op_errstr); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL, +-- +1.8.3.1 + diff --git a/SOURCES/0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch b/SOURCES/0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch new file mode 100644 index 0000000..db831a2 --- /dev/null +++ b/SOURCES/0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch @@ -0,0 +1,74 @@ +From bbcfd7e28b43845bac675dcc486bde09b0953f64 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham +Date: Thu, 11 Apr 2019 14:40:11 +0530 +Subject: [PATCH 088/124] tier: fix failures noticed during tier start and tier + restart. + +Problem 1: when tier is started using the tier start command, +the out put was skipped during a failure. failures don't have an +transaction id. this id was checked and if its missing then +it skips. + +fix: had to remove the unnecessary jump for that case. + +problem 2: When tier was restarted, the tierd doesn't come online. +This was because, there were a certain values that were supposed +to be stored in glusterd (gluster-store.c) which will be used +during restart to come to the original state. +These values were stored. as they were missing, tierd didn't come +online. + +fix: store the value and make it available during the start. + +Label: DOWNSTREAM ONLY + +Change-Id: I7df898fa4c3b72fe8ded4adbf573307a59a37e5e +Signed-off-by: Hari Gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/167653 +Tested-by: RHGS Build Bot +Reviewed-by: Sanju Rakonde +Reviewed-by: Atin Mukherjee +--- + cli/src/cli-rpc-ops.c | 1 - + xlators/mgmt/glusterd/src/glusterd-store.c | 13 +++++++++++++ + 2 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 736cd18..b167e26 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -1973,7 +1973,6 @@ gf_cli_defrag_volume_cbk(struct rpc_req *req, struct iovec *iov, int count, + if (ret) { + gf_log("cli", GF_LOG_WARNING, "failed to get %s from dict", + GF_REBALANCE_TID_KEY); +- goto out; + } + if (rsp.op_ret && strcmp(rsp.op_errstr, "")) { + snprintf(msg, sizeof(msg), "%s", rsp.op_errstr); +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 351bd9e..4889217 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -3336,6 +3336,19 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo) + break; + + case GF_CLUSTER_TYPE_TIER: ++ if (volinfo->tier_info.cold_type == ++ GF_CLUSTER_TYPE_DISPERSE) ++ volinfo->tier_info.cold_dist_leaf_count ++ = volinfo->disperse_count; ++ else ++ volinfo->tier_info.cold_dist_leaf_count ++ = glusterd_calc_dist_leaf_count ( ++ volinfo->tier_info. ++ cold_replica_count, ++ 1); ++ ++ break; ++ + case GF_CLUSTER_TYPE_STRIPE: + case GF_CLUSTER_TYPE_STRIPE_REPLICATE: + gf_msg(this->name, GF_LOG_CRITICAL, ENOTSUP, +-- +1.8.3.1 + diff --git a/SOURCES/0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch b/SOURCES/0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch new file mode 100644 index 0000000..6bdb7fc --- /dev/null +++ b/SOURCES/0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch @@ -0,0 +1,85 @@ +From 8ba7e04362019ea2d0e80e67eb214d53dca58774 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Thu, 17 Nov 2016 12:44:38 +0530 +Subject: [PATCH 089/124] glusterd/gNFS : On post upgrade to 3.2, disable gNFS + for all volumes + +Currently on 3.2 gNFS is dsiabled for newly created volumes or old volumes +with default value. There will be volumes which have explicitly turn off +nfs.disable option. This change disable gNFS even for that volume as well. + +label : DOWNSTREAM ONLY + +Change-Id: I4ddeb23690271034b0bbb3fc50b359350b5eae87 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/90425 +Reviewed-by: Atin Mukherjee +Tested-by: Atin Mukherjee +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167573 +Tested-by: RHGS Build Bot +Reviewed-by: Soumya Koduri +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 43 ++++++++++++++++++------------ + 1 file changed, 26 insertions(+), 17 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 10e2d48..06ea8cf 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2501,26 +2501,35 @@ glusterd_update_volumes_dict(glusterd_volinfo_t *volinfo) + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + /* 3.9.0 onwards gNFS will be disabled by default. In case of an upgrade +- * from anything below than 3.9.0 to 3.9.x the volume's dictionary will +- * not have 'nfs.disable' key set which means the same will not be set +- * to on until explicitly done. setnfs.disable to 'on' at op-version +- * bump up flow is the ideal way here. The same is also applicable for +- * transport.address-family where if the transport type is set to tcp +- * then transport.address-family is defaulted to 'inet'. ++ * from anything below than 3.9.0 to 3.9.x, the value for nfs.disable is ++ * set to 'on' for all volumes even if it is explicitly set to 'off' in ++ * previous version. This change is only applicable to downstream code. ++ * Setting nfs.disable to 'on' at op-version bump up flow is the ideal ++ * way here. The same is also applicable for transport.address-family ++ * where if the transport type is set to tcp then transport.address-family ++ * is defaulted to 'inet'. + */ + if (conf->op_version >= GD_OP_VERSION_3_9_0) { +- if (dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 1)) { +- ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY, +- "on"); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, +- "Failed to set " +- "option ' NFS_DISABLE_MAP_KEY ' on " +- "volume %s", +- volinfo->volname); +- goto out; +- } ++ if (!(dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 0))) { ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, ++ "Gluster NFS is" ++ " being deprecated in favor of NFS-Ganesha, " ++ "hence setting nfs.disable to 'on' for volume " ++ "%s. Please re-enable it if requires", ++ volinfo->volname); ++ } ++ ++ ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY, ++ "on"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set " ++ "option ' NFS_DISABLE_MAP_KEY ' on " ++ "volume %s", ++ volinfo->volname); ++ goto out; + } ++ + ret = dict_get_strn(volinfo->dict, "transport.address-family", + SLEN("transport.address-family"), + &address_family_str); +-- +1.8.3.1 + diff --git a/SOURCES/0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch b/SOURCES/0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch new file mode 100644 index 0000000..cbe1403 --- /dev/null +++ b/SOURCES/0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch @@ -0,0 +1,307 @@ +From 19210e4fc551893d1545e719fa26d9ad2d2f5cba Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Mon, 13 Nov 2017 18:41:58 +0530 +Subject: [PATCH 090/124] Revert "build: conditionally build legacy gNFS server + and associated sub-packaging" + +This reverts commit 83abcba6b42f94eb5a6495a634d4055362a9d79d. + +label : DOWNSTREAM ONLY + +Change-Id: If1c02d80b746e0a5b5e2c9a3625909158eff55d5 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167575 +Tested-by: RHGS Build Bot +Reviewed-by: Soumya Koduri +Reviewed-by: Kaleb Keithley +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + configure.ac | 13 ------- + extras/LinuxRPM/Makefile.am | 4 +-- + glusterfs.spec.in | 54 ++++++---------------------- + xlators/Makefile.am | 5 +-- + xlators/mgmt/glusterd/src/glusterd-nfs-svc.c | 27 ++++++-------- + 5 files changed, 24 insertions(+), 79 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 633e850..521671b 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -1359,18 +1359,6 @@ if test -n "$LIBAIO"; then + BUILD_LIBAIO=yes + fi + +-dnl gnfs section +-BUILD_GNFS="no" +-AC_ARG_ENABLE([gnfs], +- AC_HELP_STRING([--enable-gnfs], +- [Enable legacy gnfs server xlator.])) +-if test "x${with_server}" = "xyes" -a "x$enable_gnfs" = "xyes"; then +- BUILD_GNFS="yes" +-fi +-AM_CONDITIONAL([BUILD_GNFS], [test x$BUILD_GNFS = xyes]) +-AC_SUBST(BUILD_GNFS) +-dnl end gnfs section +- + dnl Check for userspace-rcu + PKG_CHECK_MODULES([URCU], [liburcu-bp], [], + [AC_CHECK_HEADERS([urcu-bp.h], +@@ -1624,7 +1612,6 @@ echo "EC dynamic support : $EC_DYNAMIC_SUPPORT" + echo "Use memory pools : $USE_MEMPOOL" + echo "Nanosecond m/atimes : $BUILD_NANOSECOND_TIMESTAMPS" + echo "Server components : $with_server" +-echo "Legacy gNFS server : $BUILD_GNFS" + echo "IPV6 default : $with_ipv6_default" + echo "Use TIRPC : $with_libtirpc" + echo "With Python : ${PYTHON_VERSION}" +diff --git a/extras/LinuxRPM/Makefile.am b/extras/LinuxRPM/Makefile.am +index f028537..61fd6da 100644 +--- a/extras/LinuxRPM/Makefile.am ++++ b/extras/LinuxRPM/Makefile.am +@@ -18,7 +18,7 @@ autogen: + cd ../.. && \ + rm -rf autom4te.cache && \ + ./autogen.sh && \ +- ./configure --enable-gnfs --with-previous-options ++ ./configure --with-previous-options + + prep: + $(MAKE) -C ../.. dist; +@@ -36,7 +36,7 @@ srcrpm: + mv rpmbuild/SRPMS/* . + + rpms: +- rpmbuild --define '_topdir $(shell pwd)/rpmbuild' --with gnfs -bb rpmbuild/SPECS/glusterfs.spec ++ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' -bb rpmbuild/SPECS/glusterfs.spec + mv rpmbuild/RPMS/*/* . + + # EPEL-5 does not like new versions of rpmbuild and requires some +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index f6b823d..cb17eaa 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -52,11 +52,6 @@ + # rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without georeplication + %{?_without_georeplication:%global _without_georeplication --disable-georeplication} + +-# gnfs +-# if you wish to compile an rpm with the legacy gNFS server xlator +-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with gnfs +-%{?_with_gnfs:%global _with_gnfs --enable-gnfs} +- + # ipv6default + # if you wish to compile an rpm with IPv6 default... + # rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with ipv6default +@@ -153,7 +148,6 @@ + %if 0%{?_without_server:1} + %global _without_events --disable-events + %global _without_georeplication --disable-georeplication +-%global _with_gnfs %{nil} + %global _without_tiering --disable-tiering + %global _without_ocf --without-ocf + %endif +@@ -525,25 +519,6 @@ is in userspace and easily manageable. + This package provides support to geo-replication. + %endif + +-%if ( 0%{?_with_gnfs:1} ) +-%package gnfs +-Summary: GlusterFS gNFS server +-Requires: %{name}%{?_isa} = %{version}-%{release} +-Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} +-Requires: nfs-utils +- +-%description gnfs +-GlusterFS is a distributed file-system capable of scaling to several +-petabytes. It aggregates various storage bricks over Infiniband RDMA +-or TCP/IP interconnect into one large parallel network file +-system. GlusterFS is one of the most sophisticated file systems in +-terms of features and extensibility. It borrows a powerful concept +-called Translators from GNU Hurd kernel. Much of the code in GlusterFS +-is in user space and easily manageable. +- +-This package provides the glusterfs legacy gNFS server xlator +-%endif +- + %package libs + Summary: GlusterFS common libraries + +@@ -659,6 +634,7 @@ Requires: %{name}-api%{?_isa} = %{version}-%{release} + Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} + # lvm2 for snapshot, and nfs-utils and rpcbind/portmap for gnfs server + Requires: lvm2 ++Requires: nfs-utils + %if ( 0%{?_with_systemd:1} ) + %{?systemd_requires} + %else +@@ -789,7 +765,6 @@ export LDFLAGS + %{?_with_cmocka} \ + %{?_with_debug} \ + %{?_with_firewalld} \ +- %{?_with_gnfs} \ + %{?_with_tmpfilesdir} \ + %{?_with_tsan} \ + %{?_with_valgrind} \ +@@ -1286,17 +1261,6 @@ exit 0 + %{_bindir}/fusermount-glusterfs + %endif + +-%if ( 0%{?_with_gnfs:1} && 0%{!?_without_server:1} ) +-%files gnfs +-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs +- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs/server.so +-%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs +-%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol +-%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run +-%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid +-%endif +- + %files thin-arbiter + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features +@@ -1409,11 +1373,6 @@ exit 0 + %config(noreplace) %{_sysconfdir}/glusterfs + %exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol + %exclude %{_sysconfdir}/glusterfs/eventsconfig.json +-%exclude %{_sharedstatedir}/glusterd/nfs/nfs-server.vol +-%exclude %{_sharedstatedir}/glusterd/nfs/run/nfs.pid +-%if ( 0%{?_with_gnfs:1} ) +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs/* +-%endif + %config(noreplace) %{_sysconfdir}/sysconfig/glusterd + %if ( 0%{_for_fedora_koji_builds} ) + %config(noreplace) %{_sysconfdir}/sysconfig/glusterfsd +@@ -1461,6 +1420,7 @@ exit 0 + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so ++ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs* + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol +@@ -1477,6 +1437,7 @@ exit 0 + + # /var/lib/glusterd, e.g. hookscripts, etc. + %ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info ++%ghost %attr(0600,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/options + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/bitd + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/groups +@@ -1529,7 +1490,11 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S30samba-stop.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S29CTDB-teardown.sh +-%config(noreplace) %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options ++%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs ++%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol ++%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run ++%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid ++%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/quotad + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/scrub +@@ -1995,6 +1960,9 @@ fi + %endif + + %changelog ++* Sun Apr 7 2019 Jiffin Tony Thottan ++- DOWNSTREAM ONLY - revert of 83abcb(gnfs in an optional subpackage) ++ + * Sun Apr 7 2019 Soumya Koduri + - As an interim fix add dependency on netstat(/net-tools) for glusterfs-ganesha package (#1395574) + +diff --git a/xlators/Makefile.am b/xlators/Makefile.am +index ef20cbb..be54eb3 100644 +--- a/xlators/Makefile.am ++++ b/xlators/Makefile.am +@@ -1,12 +1,9 @@ +-if BUILD_GNFS +- GNFS_DIR = nfs +-endif + + DIST_SUBDIRS = cluster storage protocol performance debug features \ + mount nfs mgmt system playground meta + + SUBDIRS = cluster storage protocol performance debug features \ +- mount ${GNFS_DIR} mgmt system playground meta ++ mount nfs mgmt system playground meta + + EXTRA_DIST = xlator.sym + +diff --git a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c +index 36e9052..3960031 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c +@@ -18,6 +18,8 @@ + #include "glusterd-messages.h" + #include "glusterd-svc-helper.h" + ++static char *nfs_svc_name = "nfs"; ++ + static gf_boolean_t + glusterd_nfssvc_need_start() + { +@@ -41,6 +43,12 @@ glusterd_nfssvc_need_start() + return start; + } + ++int ++glusterd_nfssvc_init(glusterd_svc_t *svc) ++{ ++ return glusterd_svc_init(svc, nfs_svc_name); ++} ++ + static int + glusterd_nfssvc_create_volfile() + { +@@ -49,7 +57,7 @@ glusterd_nfssvc_create_volfile() + }; + glusterd_conf_t *conf = THIS->private; + +- glusterd_svc_build_volfile_path(conf->nfs_svc.name, conf->workdir, filepath, ++ glusterd_svc_build_volfile_path(nfs_svc_name, conf->workdir, filepath, + sizeof(filepath)); + return glusterd_create_global_volfile(build_nfs_graph, filepath, NULL); + } +@@ -60,7 +68,7 @@ glusterd_nfssvc_manager(glusterd_svc_t *svc, void *data, int flags) + int ret = -1; + + if (!svc->inited) { +- ret = glusterd_svc_init(svc, "nfs"); ++ ret = glusterd_nfssvc_init(svc); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_NFSSVC, + "Failed to init nfs service"); +@@ -75,13 +83,6 @@ glusterd_nfssvc_manager(glusterd_svc_t *svc, void *data, int flags) + if (ret) + goto out; + +- /* not an error, or a (very) soft error at best */ +- if (sys_access(XLATORDIR "/nfs/server.so", R_OK) != 0) { +- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_GNFS_XLATOR_NOT_INSTALLED, +- "nfs/server.so xlator is not installed"); +- goto out; +- } +- + ret = glusterd_nfssvc_create_volfile(); + if (ret) + goto out; +@@ -155,14 +156,6 @@ glusterd_nfssvc_reconfigure() + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + +- /* not an error, or a (very) soft error at best */ +- if (sys_access(XLATORDIR "/nfs/server.so", R_OK) != 0) { +- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_GNFS_XLATOR_NOT_INSTALLED, +- "nfs/server.so xlator is not installed"); +- ret = 0; +- goto out; +- } +- + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +-- +1.8.3.1 + diff --git a/SOURCES/0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch b/SOURCES/0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch new file mode 100644 index 0000000..292b3f5 --- /dev/null +++ b/SOURCES/0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch @@ -0,0 +1,110 @@ +From ca3a4ebeddfef8c6909ff5388787a91ee52fd675 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Thu, 15 Dec 2016 17:14:01 +0530 +Subject: [PATCH 091/124] glusterd/gNFS : explicitly set "nfs.disable" to "off" + after 3.2 upgrade + +Gluster NFS was enabled by default for all volumes till 3.1. But 3.2 onwards +for the new volumes it will be disabled by setting "nfs.disable" to "on". +This take patch will take care of existing volume in such a way that if the +option is not configured, it will set "nfs.disable" to "off" during op-version +bump up. + +Also this patch removes the warning message while enabling gluster NFS for +a volume. + +label : DOWNSTREAM ONLY + +Change-Id: Ib199c3180204f917791b4627c58d846750d18a5a +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/93146 +Reviewed-by: Soumya Koduri +Reviewed-by: Atin Mukherjee +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/167574 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-parser.c | 16 --------------- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 31 ++++++++++++------------------ + 2 files changed, 12 insertions(+), 35 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index f85958b..92ceb8e 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1678,22 +1678,6 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + goto out; + } + } +- if ((!strcmp(key, "nfs.disable")) && (!strcmp(value, "off"))) { +- question = +- "Gluster NFS is being deprecated in favor " +- "of NFS-Ganesha Enter \"yes\" to continue " +- "using Gluster NFS"; +- answer = cli_cmd_get_confirmation(state, question); +- if (GF_ANSWER_NO == answer) { +- gf_log("cli", GF_LOG_ERROR, +- "Operation " +- "cancelled, exiting"); +- *op_errstr = gf_strdup("Aborted by user."); +- ret = -1; +- goto out; +- } +- } +- + if ((strcmp(key, "cluster.brick-multiplex") == 0)) { + question = + "Brick-multiplexing is supported only for " +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 06ea8cf..df8a6ab 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2502,32 +2502,25 @@ glusterd_update_volumes_dict(glusterd_volinfo_t *volinfo) + + /* 3.9.0 onwards gNFS will be disabled by default. In case of an upgrade + * from anything below than 3.9.0 to 3.9.x, the value for nfs.disable is +- * set to 'on' for all volumes even if it is explicitly set to 'off' in ++ * set to 'off' for all volumes even if it is explicitly set to 'on' in + * previous version. This change is only applicable to downstream code. +- * Setting nfs.disable to 'on' at op-version bump up flow is the ideal ++ * Setting nfs.disable to 'off' at op-version bump up flow is the ideal + * way here. The same is also applicable for transport.address-family + * where if the transport type is set to tcp then transport.address-family + * is defaulted to 'inet'. + */ + if (conf->op_version >= GD_OP_VERSION_3_9_0) { + if (!(dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 0))) { +- gf_msg(this->name, GF_LOG_INFO, 0, 0, +- "Gluster NFS is" +- " being deprecated in favor of NFS-Ganesha, " +- "hence setting nfs.disable to 'on' for volume " +- "%s. Please re-enable it if requires", +- volinfo->volname); +- } +- +- ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY, +- "on"); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, +- "Failed to set " +- "option ' NFS_DISABLE_MAP_KEY ' on " +- "volume %s", +- volinfo->volname); +- goto out; ++ ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY, ++ "off"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set " ++ "option ' NFS_DISABLE_MAP_KEY ' off " ++ "volume %s", ++ volinfo->volname); ++ goto out; ++ } + } + + ret = dict_get_strn(volinfo->dict, "transport.address-family", +-- +1.8.3.1 + diff --git a/SOURCES/0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch b/SOURCES/0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch new file mode 100644 index 0000000..f4a39aa --- /dev/null +++ b/SOURCES/0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch @@ -0,0 +1,41 @@ +From 82d7c8e057b9e22d13ca89f2a75e65a42878b7c3 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 2 Apr 2019 10:45:15 +0530 +Subject: [PATCH 092/124] logging: Fix GF_LOG_OCCASSIONALLY API + +GF_LOG_OCCASSIONALLY doesn't log on the first instance rather at every +42nd iterations which isn't effective as in some cases we might not have +the code flow hitting the same log for as many as 42 times and we'd end +up suppressing the log. + +>upstream fix : https://review.gluster.org/#/c/glusterfs/+/22475/ +>Fixes: bz#1694925 +>Change-Id: Iee293281d25a652b64df111d59b13de4efce06fa +>Signed-off-by: Atin Mukherjee + +BUG: 1691620 +Change-Id: Iee293281d25a652b64df111d59b13de4efce06fa +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/167822 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs/logging.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/glusterfs/logging.h b/libglusterfs/src/glusterfs/logging.h +index c81e19b..3655b1d 100644 +--- a/libglusterfs/src/glusterfs/logging.h ++++ b/libglusterfs/src/glusterfs/logging.h +@@ -300,7 +300,7 @@ _gf_log_eh(const char *function, const char *fmt, ...) + + /* Log once in GF_UNIVERSAL_ANSWER times */ + #define GF_LOG_OCCASIONALLY(var, args...) \ +- if (!(var++ % GF_UNIVERSAL_ANSWER)) { \ ++ if (var++ == 0 || !((var - 1) % GF_UNIVERSAL_ANSWER)) { \ + gf_log(args); \ + } + +-- +1.8.3.1 + diff --git a/SOURCES/0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch b/SOURCES/0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch new file mode 100644 index 0000000..7c3782c --- /dev/null +++ b/SOURCES/0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch @@ -0,0 +1,106 @@ +From 713f55b4a5cc582d06a10a1c9a0cdf71a4636a10 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Mon, 15 Apr 2019 07:57:40 +0530 +Subject: [PATCH 093/124] glusterd: Change op-version of cache-invalidation in + quick-read + +In upstream cache-invalidation option in quick read was introduced with +4.0. There are two problems with it: + +1. The key being made duplicate to md-cache xlator, so setting the same +option actually enables this feature on both md-cache and quick-read. +2. As the op-version tagged to this key was GD_OP_VERSION_4_0_0, with a RHGS +3.5 cluster when a md-cache profile is set to a particular volume old +client fails to mount which is wrong. + +Solving 1 with out breaking backward compatibility in upstream is quite hard. +This patch addresses both the problems in downstream by (a) changing the +op-version to GD_OP_VERSION_6_0_0 and (b) renaming this key to +quick-read-cache-invalidation. The fix is currently made downstream only till a +proper solution is identified in upstream. + +Label: DOWNSTREAM ONLY +BUG: 1697820 +Change-Id: I1251424576d6ebbdb2a761400fd20f0aff0c80a2 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/167823 +Reviewed-by: Amar Tumballi Suryanarayan +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/basic/quick-read-with-upcall.t | 1 + + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 +++--- + xlators/performance/quick-read/src/quick-read.c | 11 ++++++----- + 3 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/tests/basic/quick-read-with-upcall.t b/tests/basic/quick-read-with-upcall.t +index 318e93a..647dacf 100644 +--- a/tests/basic/quick-read-with-upcall.t ++++ b/tests/basic/quick-read-with-upcall.t +@@ -58,6 +58,7 @@ EXPECT "$D0" cat $M1/test1.txt + sleep 60 + EXPECT "$D1" cat $M1/test1.txt + ++TEST $CLI volume set $V0 performance.quick-read-cache-invalidation on + TEST $CLI volume set $V0 performance.cache-invalidation on + + TEST write_to "$M0/test2.txt" "$D0" +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index a877805..42ca9bb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -1690,10 +1690,10 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .option = "cache-timeout", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, +- {.key = "performance.cache-invalidation", ++ {.key = "performance.quick-read-cache-invalidation", + .voltype = "performance/quick-read", +- .option = "cache-invalidation", +- .op_version = GD_OP_VERSION_4_0_0, ++ .option = "quick-read-cache-invalidation", ++ .op_version = GD_OP_VERSION_6_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.ctime-invalidation", + .voltype = "performance/quick-read", +diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c +index 244e8c8..59553c0 100644 +--- a/xlators/performance/quick-read/src/quick-read.c ++++ b/xlators/performance/quick-read/src/quick-read.c +@@ -1218,8 +1218,8 @@ qr_reconfigure(xlator_t *this, dict_t *options) + + GF_OPTION_RECONF("cache-timeout", conf->cache_timeout, options, int32, out); + +- GF_OPTION_RECONF("cache-invalidation", conf->qr_invalidation, options, bool, +- out); ++ GF_OPTION_RECONF("quick-read-cache-invalidation", conf->qr_invalidation, ++ options, bool, out); + + GF_OPTION_RECONF("ctime-invalidation", conf->ctime_invalidation, options, + bool, out); +@@ -1369,7 +1369,8 @@ qr_init(xlator_t *this) + + GF_OPTION_INIT("cache-timeout", conf->cache_timeout, int32, out); + +- GF_OPTION_INIT("cache-invalidation", conf->qr_invalidation, bool, out); ++ GF_OPTION_INIT("quick-read-cache-invalidation", conf->qr_invalidation, bool, ++ out); + + GF_OPTION_INIT("cache-size", conf->cache_size, size_uint64, out); + if (!check_cache_size_ok(this, conf->cache_size)) { +@@ -1615,10 +1616,10 @@ struct volume_options qr_options[] = { + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + }, + { +- .key = {"cache-invalidation"}, ++ .key = {"quick-read-cache-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", +- .op_version = {GD_OP_VERSION_4_0_0}, ++ .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "When \"on\", invalidates/updates the metadata cache," + " on receiving the cache-invalidation notifications", +-- +1.8.3.1 + diff --git a/SOURCES/0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch b/SOURCES/0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch new file mode 100644 index 0000000..727f8b5 --- /dev/null +++ b/SOURCES/0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch @@ -0,0 +1,45 @@ +From dab37dc78d21762ac3379ad505f8fc4ec996d0f7 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 9 Apr 2019 14:58:29 +0530 +Subject: [PATCH 094/124] glusterd: load ctime in the client graph only if it's + not turned off + +Considering ctime is a client side feature, we can't blindly load ctime +xlator into the client graph if it's explicitly turned off, that'd +result into backward compatibility issue where an old client can't mount +a volume configured on a server which is having ctime feature. + +> Upstream patch : https://review.gluster.org/#/c/glusterfs/+/22536/ +>Fixes: bz#1697907 +>Change-Id: I6ae7b96d056073aa6746de9a449cf319786d45cc +>Signed-off-by: Atin Mukherjee + +BUG: 1697820 +Change-Id: I6ae7b96d056073aa6746de9a449cf319786d45cc +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/167815 +Tested-by: RHGS Build Bot +Reviewed-by: Mohit Agrawal +Reviewed-by: Amar Tumballi Suryanarayan +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index ed24858..012f38e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4358,7 +4358,8 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + } + } + +- if (conf->op_version >= GD_OP_VERSION_5_0) { ++ if (conf->op_version >= GD_OP_VERSION_5_0 && ++ !dict_get_str_boolean(set_dict, "features.ctime", _gf_false)) { + xl = volgen_graph_add(graph, "features/utime", volname); + if (!xl) { + ret = -1; +-- +1.8.3.1 + diff --git a/SOURCES/0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch b/SOURCES/0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch new file mode 100644 index 0000000..0e17c44 --- /dev/null +++ b/SOURCES/0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch @@ -0,0 +1,204 @@ +From cca418b78ec976aa69eacd56b0e6127ea7e3dd26 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Thu, 4 Apr 2019 15:31:56 +0530 +Subject: [PATCH 095/124] cluster/afr: Remove local from owners_list on failure + of lock-acquisition + + Backport of https://review.gluster.org/c/glusterfs/+/22515 + +When eager-lock lock acquisition fails because of say network failures, the +local is not being removed from owners_list, this leads to accumulation of +waiting frames and the application will hang because the waiting frames are +under the assumption that another transaction is in the process of acquiring +lock because owner-list is not empty. Handled this case as well in this patch. +Added asserts to make it easier to find these problems in future. + +Change-Id: I3101393265e9827755725b1f2d94a93d8709e923 +fixes: bz#1688395 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/167859 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/replicate/bug-1696599-io-hang.t | 47 ++++++++++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 8 ++--- + xlators/cluster/afr/src/afr-lk-common.c | 1 - + xlators/cluster/afr/src/afr-transaction.c | 19 +++++------- + xlators/cluster/afr/src/afr.h | 4 +-- + 5 files changed, 61 insertions(+), 18 deletions(-) + create mode 100755 tests/bugs/replicate/bug-1696599-io-hang.t + +diff --git a/tests/bugs/replicate/bug-1696599-io-hang.t b/tests/bugs/replicate/bug-1696599-io-hang.t +new file mode 100755 +index 0000000..869cdb9 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1696599-io-hang.t +@@ -0,0 +1,47 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fileio.rc ++ ++#Tests that local structures in afr are removed from granted/blocked list of ++#locks when inodelk fails on all bricks ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3} ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.client-io-threads off ++TEST $CLI volume set $V0 delay-gen locks ++TEST $CLI volume set $V0 delay-gen.delay-duration 5000000 ++TEST $CLI volume set $V0 delay-gen.delay-percentage 100 ++TEST $CLI volume set $V0 delay-gen.enable finodelk ++ ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++ ++TEST $GFS -s $H0 --volfile-id $V0 $M0 ++TEST touch $M0/file ++#Trigger write and stop bricks so inodelks fail on all bricks leading to ++#lock failure condition ++echo abc >> $M0/file & ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume reset $V0 delay-gen ++wait ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2 ++#Test that only one write succeeded, this tests that delay-gen worked as ++#expected ++echo abc >> $M0/file ++EXPECT "abc" cat $M0/file ++ ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 45b96e3..47a5d3a 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -5763,6 +5763,10 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this) + afr_private_t *priv = NULL; + + priv = this->private; ++ INIT_LIST_HEAD(&local->transaction.wait_list); ++ INIT_LIST_HEAD(&local->transaction.owner_list); ++ INIT_LIST_HEAD(&local->ta_waitq); ++ INIT_LIST_HEAD(&local->ta_onwireq); + ret = afr_internal_lock_init(&local->internal_lock, priv->child_count); + if (ret < 0) + goto out; +@@ -5800,10 +5804,6 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this) + goto out; + + ret = 0; +- INIT_LIST_HEAD(&local->transaction.wait_list); +- INIT_LIST_HEAD(&local->transaction.owner_list); +- INIT_LIST_HEAD(&local->ta_waitq); +- INIT_LIST_HEAD(&local->ta_onwireq); + out: + return ret; + } +diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c +index 4091671..bc8eabe 100644 +--- a/xlators/cluster/afr/src/afr-lk-common.c ++++ b/xlators/cluster/afr/src/afr-lk-common.c +@@ -397,7 +397,6 @@ afr_unlock_now(call_frame_t *frame, xlator_t *this) + int_lock->lk_call_count = call_count; + + if (!call_count) { +- GF_ASSERT(!local->transaction.do_eager_unlock); + gf_msg_trace(this->name, 0, "No internal locks unlocked"); + int_lock->lock_cbk(frame, this); + goto out; +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 229820b..15f3a7e 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -372,6 +372,8 @@ afr_transaction_done(call_frame_t *frame, xlator_t *this) + } + local->transaction.unwind(frame, this); + ++ GF_ASSERT(list_empty(&local->transaction.owner_list)); ++ GF_ASSERT(list_empty(&local->transaction.wait_list)); + AFR_STACK_DESTROY(frame); + + return 0; +@@ -393,7 +395,7 @@ afr_lock_fail_shared(afr_local_t *local, struct list_head *list) + } + + static void +-afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked) ++afr_handle_lock_acquire_failure(afr_local_t *local) + { + struct list_head shared; + afr_lock_t *lock = NULL; +@@ -414,13 +416,8 @@ afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked) + afr_lock_fail_shared(local, &shared); + local->transaction.do_eager_unlock = _gf_true; + out: +- if (locked) { +- local->internal_lock.lock_cbk = afr_transaction_done; +- afr_unlock(local->transaction.frame, local->transaction.frame->this); +- } else { +- afr_transaction_done(local->transaction.frame, +- local->transaction.frame->this); +- } ++ local->internal_lock.lock_cbk = afr_transaction_done; ++ afr_unlock(local->transaction.frame, local->transaction.frame->this); + } + + call_frame_t * +@@ -619,7 +616,7 @@ afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this) + failure_count = AFR_COUNT(local->transaction.failed_subvols, + priv->child_count); + if (failure_count == priv->child_count) { +- afr_handle_lock_acquire_failure(local, _gf_true); ++ afr_handle_lock_acquire_failure(local); + return 0; + } else { + lock = &local->inode_ctx->lock[local->transaction.type]; +@@ -2092,7 +2089,7 @@ err: + local->op_ret = -1; + local->op_errno = op_errno; + +- afr_handle_lock_acquire_failure(local, _gf_true); ++ afr_handle_lock_acquire_failure(local); + + if (xdata_req) + dict_unref(xdata_req); +@@ -2361,7 +2358,7 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this) + } else { + lock = &local->inode_ctx->lock[local->transaction.type]; + if (local->internal_lock.lock_op_ret < 0) { +- afr_handle_lock_acquire_failure(local, _gf_false); ++ afr_handle_lock_acquire_failure(local); + } else { + lock->event_generation = local->event_generation; + afr_changelog_pre_op(frame, this); +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index 2cc3797..e731cfa 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -1091,8 +1091,8 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd); + #define AFR_FRAME_INIT(frame, op_errno) \ + ({ \ + frame->local = mem_get0(THIS->local_pool); \ +- if (afr_local_init(frame->local, THIS->private, &op_errno)) { \ +- afr_local_cleanup(frame->local, THIS); \ ++ if (afr_local_init(frame->local, frame->this->private, &op_errno)) { \ ++ afr_local_cleanup(frame->local, frame->this); \ + mem_put(frame->local); \ + frame->local = NULL; \ + }; \ +-- +1.8.3.1 + diff --git a/SOURCES/0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch b/SOURCES/0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch new file mode 100644 index 0000000..cc2a448 --- /dev/null +++ b/SOURCES/0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch @@ -0,0 +1,94 @@ +From 3bcffadcd77eebe6b4f7e5015ad41ec7c1d1ec3e Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 11 Apr 2019 20:38:53 +0530 +Subject: [PATCH 096/124] core: Brick is not able to detach successfully in + brick_mux environment + +Problem: In brick_mux environment, while volumes are stopped in a + loop bricks are not detached successfully. Brick's are not + detached because xprtrefcnt has not become 0 for detached brick. + At the time of initiating brick detach process server_notify + saves xprtrefcnt on detach brick and once counter has become + 0 then server_rpc_notify spawn a server_graph_janitor_threads + for cleanup brick resources.xprtrefcnt has not become 0 because + socket framework is not working due to assigning 0 as a fd for socket. + In commit dc25d2c1eeace91669052e3cecc083896e7329b2 + there was a change in changelog fini to close htime_fd if htime_fd is not + negative, by default htime_fd is 0 so it close 0 also. + +Solution: Initialize htime_fd to -1 after just allocate changelog_priv + by GF_CALLOC + +> Fixes: bz#1699025 +> Change-Id: I5f7ca62a0eb1c0510c3e9b880d6ab8af8d736a25 +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit b777d83001d8006420b6c7d2d88fe68950aa7e00) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22549/ + +Fixes: bz#1698919 +Change-Id: Ib5b74aa0818235625f8aac7c23d4daa599da3fd1 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/167826 +Tested-by: Mohit Agrawal +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bug-1699025-brick-mux-detach-brick-fd-issue.t | 33 ++++++++++++++++++++++ + xlators/features/changelog/src/changelog.c | 1 + + 2 files changed, 34 insertions(+) + create mode 100644 tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t + +diff --git a/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t +new file mode 100644 +index 0000000..1acbaa8 +--- /dev/null ++++ b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t +@@ -0,0 +1,33 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../cluster.rc ++ ++function count_brick_processes { ++ pgrep glusterfsd | wc -l ++} ++ ++cleanup ++ ++#bug-1444596 - validating brick mux ++ ++TEST glusterd ++TEST $CLI volume create $V0 $H0:$B0/brick{0,1} ++TEST $CLI volume create $V1 $H0:$B0/brick{2,3} ++ ++TEST $CLI volume set all cluster.brick-multiplex on ++ ++TEST $CLI volume start $V0 ++TEST $CLI volume start $V1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count ++EXPECT 1 count_brick_processes ++ ++TEST $CLI volume stop $V1 ++# At the time initialize brick daemon it always keeps open ++# standard fd's (0, 1 , 2) so after stop 1 volume fd's should ++# be open ++nofds=$(ls -lrth /proc/`pgrep glusterfsd`/fd | grep dev/null | wc -l) ++TEST [ $((nofds)) -eq 3 ] ++ ++cleanup +diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c +index 1f22a97..d9025f3 100644 +--- a/xlators/features/changelog/src/changelog.c ++++ b/xlators/features/changelog/src/changelog.c +@@ -2740,6 +2740,7 @@ init(xlator_t *this) + GF_ATOMIC_INIT(priv->clntcnt, 0); + GF_ATOMIC_INIT(priv->xprtcnt, 0); + INIT_LIST_HEAD(&priv->xprt_list); ++ priv->htime_fd = -1; + + ret = changelog_init_options(this, priv); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch b/SOURCES/0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch new file mode 100644 index 0000000..42f1e47 --- /dev/null +++ b/SOURCES/0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch @@ -0,0 +1,61 @@ +From 302f3f87c9aa00c17ec3b49a81c8a4441d2bdf5f Mon Sep 17 00:00:00 2001 +From: Hari Gowtham +Date: Mon, 15 Apr 2019 10:01:40 +0530 +Subject: [PATCH 097/124] glusterd/tier: while doing an attach tier, the self + heal daemon is not getting started + +Problem: on a replicated volume, if attach tier is done, +The shd will be restarted. But here the restart fails because of the +graph not getting generated properly. The dict which is used for graph +creation doesn't get the values copied properly in prepare_shd_volume_options() +glusterd_prepare_shd_volume_options_for_tier() fails and skips the copy. + +This patch reverts the changes back to the way it was in 3.4 and +help in fixing the issue. Using the old dict_set_str works. + +label: DOWNSTREAM ONLY + +Change-Id: I21534ca177511e018ba76886e899b3b1a4ac4716 +Signed-off-by: Hari Gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/167825 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 012f38e..1f53beb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4895,14 +4895,21 @@ glusterd_prepare_shd_volume_options_for_tier(glusterd_volinfo_t *volinfo, + dict_t *set_dict) + { + int ret = -1; ++ char *key = NULL; + +- ret = volgen_set_shd_key_enable(set_dict, volinfo->tier_info.cold_type); +- if (ret) +- goto out; ++ key = volgen_get_shd_key (volinfo->tier_info.cold_type); ++ if (key) { ++ ret = dict_set_str (set_dict, key, "enable"); ++ if (ret) ++ goto out; ++ } + +- ret = volgen_set_shd_key_enable(set_dict, volinfo->tier_info.hot_type); +- if (ret) +- goto out; ++ key = volgen_get_shd_key (volinfo->tier_info.hot_type); ++ if (key) { ++ ret = dict_set_str (set_dict, key, "enable"); ++ if (ret) ++ goto out; ++ } + out: + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch b/SOURCES/0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch new file mode 100644 index 0000000..64d198d --- /dev/null +++ b/SOURCES/0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch @@ -0,0 +1,4617 @@ +From 2cede2b87fb3e3e0673be9cf67e7d6eec3f7879c Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Mon, 25 Feb 2019 10:05:32 +0530 +Subject: [PATCH 098/124] mgmt/shd: Implement multiplexing in self heal daemon + +Problem: + +Shd daemon is per node, which means they create a graph +with all volumes on it. While this is a great for utilizing +resources, it is so good in terms of performance and managebility. + +Because self-heal daemons doesn't have capability to automatically +reconfigure their graphs. So each time when any configurations +changes happens to the volumes(replicate/disperse), we need to restart +shd to bring the changes into the graph. + +Because of this all on going heal for all other volumes has to be +stopped in the middle, and need to restart all over again. + +Solution: + +This changes makes shd as a per volume daemon, so that the graph +will be generated for each volumes. + +When we want to start/reconfigure shd for a volume, we first search +for an existing shd running on the node, if there is none, we will +start a new process. If already a daemon is running for shd, then +we will simply detach a graph for a volume and reatach the updated +graph for the volume. This won't touch any of the on going operations +for any other volumes on the shd daemon. + +Example of an shd graph when it is per volume + + graph + ----------------------- + | debug-iostat | + ----------------------- + / | \ + / | \ + --------- --------- ---------- + | AFR-1 | | AFR-2 | | AFR-3 | + -------- --------- ---------- + +A running shd daemon with 3 volumes will be like--> + + graph + ----------------------- + | debug-iostat | + ----------------------- + / | \ + / | \ + ------------ ------------ ------------ + | volume-1 | | volume-2 | | volume-3 | + ------------ ------------ ------------ + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22075 + +>Change-Id: Idcb2698be3eeb95beaac47125565c93370afbd99 +>fixes: bz#1659708 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: I38b6e2bb62edd818e460ccf6e9740364dc676876 +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/167830 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + glusterfsd/src/glusterfsd-messages.h | 3 +- + glusterfsd/src/glusterfsd-mgmt.c | 238 ++++++- + glusterfsd/src/glusterfsd.c | 18 - + libglusterfs/src/defaults-tmpl.c | 19 +- + libglusterfs/src/glusterfs/glusterfs.h | 7 + + libglusterfs/src/glusterfs/libglusterfs-messages.h | 4 +- + libglusterfs/src/glusterfs/xlator.h | 3 + + libglusterfs/src/graph.c | 451 +++++++++++++ + libglusterfs/src/graph.y | 3 + + libglusterfs/src/libglusterfs.sym | 5 + + libglusterfs/src/statedump.c | 3 +- + libglusterfs/src/xlator.c | 16 + + rpc/rpc-lib/src/protocol-common.h | 2 + + tests/basic/glusterd/heald.t | 49 +- + .../reset-brick-and-daemons-follow-quorum.t | 8 +- + tests/volume.rc | 6 +- + xlators/mgmt/glusterd/src/Makefile.am | 6 +- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c | 42 ++ + xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h | 4 +- + xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c | 3 +- + xlators/mgmt/glusterd/src/glusterd-handler.c | 11 +- + xlators/mgmt/glusterd/src/glusterd-handshake.c | 21 + + xlators/mgmt/glusterd/src/glusterd-mem-types.h | 1 + + xlators/mgmt/glusterd/src/glusterd-messages.h | 4 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 84 ++- + .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 140 ++++ + .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 45 ++ + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 540 ++++++++++++++-- + xlators/mgmt/glusterd/src/glusterd-shd-svc.h | 17 +- + xlators/mgmt/glusterd/src/glusterd-sm.c | 12 +- + xlators/mgmt/glusterd/src/glusterd-snapd-svc.c | 3 +- + xlators/mgmt/glusterd/src/glusterd-statedump.c | 3 - + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 715 ++++++++++++++++++++- + xlators/mgmt/glusterd/src/glusterd-svc-helper.h | 40 +- + xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c | 246 +++++-- + xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h | 27 + + xlators/mgmt/glusterd/src/glusterd-tier.c | 3 +- + xlators/mgmt/glusterd/src/glusterd-tierd-svc.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 137 +++- + xlators/mgmt/glusterd/src/glusterd-utils.h | 4 + + xlators/mgmt/glusterd/src/glusterd-volgen.c | 60 +- + xlators/mgmt/glusterd/src/glusterd-volgen.h | 11 +- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 8 +- + xlators/mgmt/glusterd/src/glusterd.c | 12 +- + xlators/mgmt/glusterd/src/glusterd.h | 30 +- + xlators/protocol/client/src/client.c | 31 +- + 47 files changed, 2810 insertions(+), 291 deletions(-) + create mode 100644 xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c + create mode 100644 xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h + +diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h +index 602cd9e..94312a5 100644 +--- a/glusterfsd/src/glusterfsd-messages.h ++++ b/glusterfsd/src/glusterfsd-messages.h +@@ -35,6 +35,7 @@ GLFS_MSGID(GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3, + glusterfsd_msg_28, glusterfsd_msg_29, glusterfsd_msg_30, + glusterfsd_msg_31, glusterfsd_msg_32, glusterfsd_msg_33, + glusterfsd_msg_34, glusterfsd_msg_35, glusterfsd_msg_36, +- glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39); ++ glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39, ++ glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42); + + #endif /* !_GLUSTERFSD_MESSAGES_H_ */ +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index a6c3db5..a89c980 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -48,7 +48,20 @@ int + glusterfs_graph_unknown_options(glusterfs_graph_t *graph); + int + emancipate(glusterfs_ctx_t *ctx, int ret); ++int ++glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, ++ char *volfile_id, char *checksum); ++int ++glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, ++ gf_volfile_t *volfile_obj, char *checksum); ++int ++glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, ++ char *volfile_id, char *checksum); ++int ++glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj); + ++gf_boolean_t ++mgmt_is_multiplexed_daemon(char *name); + int + mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data) + { +@@ -62,6 +75,96 @@ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data) + } + + int ++mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id) ++{ ++ glusterfs_ctx_t *ctx = NULL; ++ int ret = 0; ++ FILE *tmpfp = NULL; ++ gf_volfile_t *volfile_obj = NULL; ++ gf_volfile_t *volfile_tmp = NULL; ++ char sha256_hash[SHA256_DIGEST_LENGTH] = { ++ 0, ++ }; ++ int tmp_fd = -1; ++ char template[] = "/tmp/glfs.volfile.XXXXXX"; ++ ++ glusterfs_compute_sha256((const unsigned char *)volfile, size, sha256_hash); ++ ctx = THIS->ctx; ++ LOCK(&ctx->volfile_lock); ++ { ++ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list) ++ { ++ if (!strcmp(volfile_id, volfile_obj->vol_id)) { ++ if (!memcmp(sha256_hash, volfile_obj->volfile_checksum, ++ sizeof(volfile_obj->volfile_checksum))) { ++ UNLOCK(&ctx->volfile_lock); ++ gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_40, ++ "No change in volfile, continuing"); ++ goto out; ++ } ++ volfile_tmp = volfile_obj; ++ break; ++ } ++ } ++ ++ /* coverity[secure_temp] mkstemp uses 0600 as the mode */ ++ tmp_fd = mkstemp(template); ++ if (-1 == tmp_fd) { ++ UNLOCK(&ctx->volfile_lock); ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_39, ++ "Unable to create temporary file: %s", template); ++ ret = -1; ++ goto out; ++ } ++ ++ /* Calling unlink so that when the file is closed or program ++ * terminates the temporary file is deleted. ++ */ ++ ret = sys_unlink(template); ++ if (ret < 0) { ++ gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_39, ++ "Unable to delete temporary file: %s", template); ++ ret = 0; ++ } ++ ++ tmpfp = fdopen(tmp_fd, "w+b"); ++ if (!tmpfp) { ++ ret = -1; ++ goto unlock; ++ } ++ ++ fwrite(volfile, size, 1, tmpfp); ++ fflush(tmpfp); ++ if (ferror(tmpfp)) { ++ ret = -1; ++ goto unlock; ++ } ++ ++ if (!volfile_tmp) { ++ /* There is no checksum in the list, which means simple attach ++ * the volfile ++ */ ++ ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id, ++ sha256_hash); ++ goto unlock; ++ } ++ ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj, ++ sha256_hash); ++ if (ret < 0) { ++ gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!"); ++ } ++ } ++unlock: ++ UNLOCK(&ctx->volfile_lock); ++out: ++ if (tmpfp) ++ fclose(tmpfp); ++ else if (tmp_fd != -1) ++ sys_close(tmp_fd); ++ return ret; ++} ++ ++int + mgmt_cbk_event(struct rpc_clnt *rpc, void *mydata, void *data) + { + return 0; +@@ -966,6 +1069,110 @@ glusterfs_handle_attach(rpcsvc_request_t *req) + } + + int ++glusterfs_handle_svc_attach(rpcsvc_request_t *req) ++{ ++ int32_t ret = -1; ++ gd1_mgmt_brick_op_req xlator_req = { ++ 0, ++ }; ++ xlator_t *this = NULL; ++ glusterfs_ctx_t *ctx = NULL; ++ ++ GF_ASSERT(req); ++ this = THIS; ++ GF_ASSERT(this); ++ ++ ctx = this->ctx; ++ ret = xdr_to_generic(req->msg[0], &xlator_req, ++ (xdrproc_t)xdr_gd1_mgmt_brick_op_req); ++ ++ if (ret < 0) { ++ /*failed to decode msg;*/ ++ req->rpc_err = GARBAGE_ARGS; ++ goto out; ++ } ++ gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41, ++ "received attach " ++ "request for volfile-id=%s", ++ xlator_req.name); ++ ret = 0; ++ ++ if (ctx->active) { ++ ret = mgmt_process_volfile(xlator_req.input.input_val, ++ xlator_req.input.input_len, xlator_req.name); ++ } else { ++ gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42, ++ "got attach for %s but no active graph", xlator_req.name); ++ } ++out: ++ if (xlator_req.input.input_val) ++ free(xlator_req.input.input_val); ++ if (xlator_req.name) ++ free(xlator_req.name); ++ glusterfs_translator_info_response_send(req, ret, NULL, NULL); ++ return 0; ++} ++ ++int ++glusterfs_handle_svc_detach(rpcsvc_request_t *req) ++{ ++ gd1_mgmt_brick_op_req xlator_req = { ++ 0, ++ }; ++ ssize_t ret; ++ glusterfs_ctx_t *ctx = NULL; ++ gf_volfile_t *volfile_obj = NULL; ++ gf_volfile_t *volfile_tmp = NULL; ++ ++ ret = xdr_to_generic(req->msg[0], &xlator_req, ++ (xdrproc_t)xdr_gd1_mgmt_brick_op_req); ++ if (ret < 0) { ++ req->rpc_err = GARBAGE_ARGS; ++ return -1; ++ } ++ ctx = glusterfsd_ctx; ++ ++ LOCK(&ctx->volfile_lock); ++ { ++ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list) ++ { ++ if (!strcmp(xlator_req.name, volfile_obj->vol_id)) { ++ volfile_tmp = volfile_obj; ++ break; ++ } ++ } ++ ++ if (!volfile_tmp) { ++ UNLOCK(&ctx->volfile_lock); ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_41, ++ "can't detach %s - not found", xlator_req.name); ++ /* ++ * Used to be -ENOENT. However, the caller asked us to ++ * make sure it's down and if it's already down that's ++ * good enough. ++ */ ++ ret = 0; ++ goto out; ++ } ++ ret = glusterfs_process_svc_detach(ctx, volfile_tmp); ++ if (ret) { ++ UNLOCK(&ctx->volfile_lock); ++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, glusterfsd_msg_41, ++ "Could not detach " ++ "old graph. Aborting the reconfiguration operation"); ++ goto out; ++ } ++ } ++ UNLOCK(&ctx->volfile_lock); ++out: ++ glusterfs_terminate_response_send(req, ret); ++ free(xlator_req.name); ++ xlator_req.name = NULL; ++ ++ return 0; ++} ++ ++int + glusterfs_handle_dump_metrics(rpcsvc_request_t *req) + { + int32_t ret = -1; +@@ -1849,6 +2056,13 @@ rpcsvc_actor_t glusterfs_actors[GLUSTERD_BRICK_MAXVALUE] = { + + [GLUSTERD_DUMP_METRICS] = {"DUMP METRICS", GLUSTERD_DUMP_METRICS, + glusterfs_handle_dump_metrics, NULL, 0, DRC_NA}, ++ ++ [GLUSTERD_SVC_ATTACH] = {"ATTACH CLIENT", GLUSTERD_SVC_ATTACH, ++ glusterfs_handle_svc_attach, NULL, 0, DRC_NA}, ++ ++ [GLUSTERD_SVC_DETACH] = {"DETACH CLIENT", GLUSTERD_SVC_DETACH, ++ glusterfs_handle_svc_detach, NULL, 0, DRC_NA}, ++ + }; + + struct rpcsvc_program glusterfs_mop_prog = { +@@ -1996,14 +2210,17 @@ mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count, + } + + volfile: +- ret = 0; + size = rsp.op_ret; ++ volfile_id = frame->local; ++ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) { ++ ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id); ++ goto post_graph_mgmt; ++ } + ++ ret = 0; + glusterfs_compute_sha256((const unsigned char *)rsp.spec, size, + sha256_hash); + +- volfile_id = frame->local; +- + LOCK(&ctx->volfile_lock); + { + locked = 1; +@@ -2105,6 +2322,7 @@ volfile: + } + + INIT_LIST_HEAD(&volfile_tmp->volfile_list); ++ volfile_tmp->graph = ctx->active; + list_add(&volfile_tmp->volfile_list, &ctx->volfile_list); + snprintf(volfile_tmp->vol_id, sizeof(volfile_tmp->vol_id), "%s", + volfile_id); +@@ -2116,6 +2334,7 @@ volfile: + + locked = 0; + ++post_graph_mgmt: + if (!is_mgmt_rpc_reconnect) { + need_emancipate = 1; + glusterfs_mgmt_pmap_signin(ctx); +@@ -2269,10 +2488,21 @@ glusterfs_volfile_fetch(glusterfs_ctx_t *ctx) + { + xlator_t *server_xl = NULL; + xlator_list_t *trav; +- int ret; ++ gf_volfile_t *volfile_obj = NULL; ++ int ret = 0; + + LOCK(&ctx->volfile_lock); + { ++ if (ctx->active && ++ mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) { ++ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list) ++ { ++ ret |= glusterfs_volfile_fetch_one(ctx, volfile_obj->vol_id); ++ } ++ UNLOCK(&ctx->volfile_lock); ++ return ret; ++ } ++ + if (ctx->active) { + server_xl = ctx->active->first; + if (strcmp(server_xl->type, "protocol/server") != 0) { +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index c983882..3aa89ca 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -2589,24 +2589,6 @@ out: + #endif + + int +-glusterfs_graph_fini(glusterfs_graph_t *graph) +-{ +- xlator_t *trav = NULL; +- +- trav = graph->first; +- +- while (trav) { +- if (trav->init_succeeded) { +- trav->fini(trav); +- trav->init_succeeded = 0; +- } +- trav = trav->next; +- } +- +- return 0; +-} +- +-int + glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp) + { + glusterfs_graph_t *graph = NULL; +diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c +index 5bf64e8..82e7f78 100644 +--- a/libglusterfs/src/defaults-tmpl.c ++++ b/libglusterfs/src/defaults-tmpl.c +@@ -127,6 +127,12 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) + GF_UNUSED int ret = 0; + xlator_t *victim = data; + ++ glusterfs_graph_t *graph = NULL; ++ ++ GF_VALIDATE_OR_GOTO("notify", this, out); ++ graph = this->graph; ++ GF_VALIDATE_OR_GOTO(this->name, graph, out); ++ + switch (event) { + case GF_EVENT_PARENT_UP: + case GF_EVENT_PARENT_DOWN: { +@@ -159,6 +165,17 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) + xlator_notify(parent->xlator, event, this, NULL); + parent = parent->next; + } ++ ++ if (event == GF_EVENT_CHILD_DOWN && ++ !(this->ctx && this->ctx->master) && (graph->top == this)) { ++ /* Make sure this is not a daemon with master xlator */ ++ pthread_mutex_lock(&graph->mutex); ++ { ++ graph->used = 0; ++ pthread_cond_broadcast(&graph->child_down_cond); ++ } ++ pthread_mutex_unlock(&graph->mutex); ++ } + } break; + case GF_EVENT_UPCALL: { + xlator_list_t *parent = this->parents; +@@ -205,7 +222,7 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) + * nothing to do with readability. + */ + } +- ++out: + return 0; + } + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 7c6af09..deec5ba 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -590,6 +590,10 @@ struct _glusterfs_graph { + int used; /* Should be set when fuse gets + first CHILD_UP */ + uint32_t volfile_checksum; ++ void *last_xl; /* Stores the last xl of the graph, as of now only populated ++ in client multiplexed code path */ ++ pthread_mutex_t mutex; ++ pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */ + }; + typedef struct _glusterfs_graph glusterfs_graph_t; + +@@ -732,6 +736,7 @@ typedef struct { + char volfile_checksum[SHA256_DIGEST_LENGTH]; + char vol_id[NAME_MAX + 1]; + struct list_head volfile_list; ++ glusterfs_graph_t *graph; + + } gf_volfile_t; + +@@ -815,4 +820,6 @@ gf_free_mig_locks(lock_migration_info_t *locks); + + int + glusterfs_read_secure_access_file(void); ++int ++glusterfs_graph_fini(glusterfs_graph_t *graph); + #endif /* _GLUSTERFS_H */ +diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h +index 1b72f6d..ea2aa60 100644 +--- a/libglusterfs/src/glusterfs/libglusterfs-messages.h ++++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h +@@ -109,6 +109,8 @@ GLFS_MSGID( + LG_MSG_PTHREAD_ATTR_INIT_FAILED, LG_MSG_INVALID_INODE_LIST, + LG_MSG_COMPACT_FAILED, LG_MSG_COMPACT_STATUS, LG_MSG_UTIMENSAT_FAILED, + LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG, +- LG_MSG_XXH64_TO_GFID_FAILED); ++ LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE, ++ LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED, ++ LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED); + + #endif /* !_LG_MESSAGES_H_ */ +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index 7002657..06152ec 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -1087,4 +1087,7 @@ handle_default_options(xlator_t *xl, dict_t *options); + + void + gluster_graph_take_reference(xlator_t *tree); ++ ++gf_boolean_t ++mgmt_is_multiplexed_daemon(char *name); + #endif /* _XLATOR_H */ +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index bb5e67a..a492dd8 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -114,6 +114,53 @@ out: + return cert_depth; + } + ++xlator_t * ++glusterfs_get_last_xlator(glusterfs_graph_t *graph) ++{ ++ xlator_t *trav = graph->first; ++ if (!trav) ++ return NULL; ++ ++ while (trav->next) ++ trav = trav->next; ++ ++ return trav; ++} ++ ++xlator_t * ++glusterfs_mux_xlator_unlink(xlator_t *pxl, xlator_t *cxl) ++{ ++ xlator_list_t *unlink = NULL; ++ xlator_list_t *prev = NULL; ++ xlator_list_t **tmp = NULL; ++ xlator_t *next_child = NULL; ++ xlator_t *xl = NULL; ++ ++ for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next) { ++ if ((*tmp)->xlator == cxl) { ++ unlink = *tmp; ++ *tmp = (*tmp)->next; ++ if (*tmp) ++ next_child = (*tmp)->xlator; ++ break; ++ } ++ prev = *tmp; ++ } ++ ++ if (!prev) ++ xl = pxl; ++ else if (prev->xlator) ++ xl = prev->xlator->graph->last_xl; ++ ++ if (xl) ++ xl->next = next_child; ++ if (next_child) ++ next_child->prev = xl; ++ ++ GF_FREE(unlink); ++ return next_child; ++} ++ + int + glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl) + { +@@ -1092,6 +1139,8 @@ glusterfs_graph_destroy_residual(glusterfs_graph_t *graph) + ret = xlator_tree_free_memacct(graph->first); + + list_del_init(&graph->list); ++ pthread_mutex_destroy(&graph->mutex); ++ pthread_cond_destroy(&graph->child_down_cond); + GF_FREE(graph); + + return ret; +@@ -1134,6 +1183,25 @@ out: + } + + int ++glusterfs_graph_fini(glusterfs_graph_t *graph) ++{ ++ xlator_t *trav = NULL; ++ ++ trav = graph->first; ++ ++ while (trav) { ++ if (trav->init_succeeded) { ++ trav->cleanup_starting = 1; ++ trav->fini(trav); ++ trav->init_succeeded = 0; ++ } ++ trav = trav->next; ++ } ++ ++ return 0; ++} ++ ++int + glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path, + glusterfs_graph_t **newgraph) + { +@@ -1256,3 +1324,386 @@ glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path, + + return 0; + } ++int ++glusterfs_muxsvc_cleanup_parent(glusterfs_ctx_t *ctx, ++ glusterfs_graph_t *parent_graph) ++{ ++ if (parent_graph) { ++ if (parent_graph->first) { ++ xlator_destroy(parent_graph->first); ++ } ++ ctx->active = NULL; ++ GF_FREE(parent_graph); ++ parent_graph = NULL; ++ } ++ return 0; ++} ++ ++void * ++glusterfs_graph_cleanup(void *arg) ++{ ++ glusterfs_graph_t *graph = NULL; ++ glusterfs_ctx_t *ctx = THIS->ctx; ++ int ret = -1; ++ graph = arg; ++ ++ if (!graph) ++ return NULL; ++ ++ /* To destroy the graph, fitst sent a GF_EVENT_PARENT_DOWN ++ * Then wait for GF_EVENT_CHILD_DOWN to get on the top ++ * xl. Once we have GF_EVENT_CHILD_DOWN event, then proceed ++ * to fini. ++ * ++ * During fini call, this will take a last unref on rpc and ++ * rpc_transport_object. ++ */ ++ if (graph->first) ++ default_notify(graph->first, GF_EVENT_PARENT_DOWN, graph->first); ++ ++ ret = pthread_mutex_lock(&graph->mutex); ++ if (ret != 0) { ++ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED, ++ "Failed to aquire a lock"); ++ goto out; ++ } ++ /* check and wait for CHILD_DOWN for top xlator*/ ++ while (graph->used) { ++ ret = pthread_cond_wait(&graph->child_down_cond, &graph->mutex); ++ if (ret != 0) ++ gf_msg("glusterfs", GF_LOG_INFO, 0, LG_MSG_GRAPH_CLEANUP_FAILED, ++ "cond wait failed "); ++ } ++ ++ ret = pthread_mutex_unlock(&graph->mutex); ++ if (ret != 0) { ++ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED, ++ "Failed to release a lock"); ++ } ++ ++ /* Though we got a child down on top xlator, we have to wait until ++ * all the notifier to exit. Because there should not be any threads ++ * that access xl variables. ++ */ ++ pthread_mutex_lock(&ctx->notify_lock); ++ { ++ while (ctx->notifying) ++ pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock); ++ } ++ pthread_mutex_unlock(&ctx->notify_lock); ++ ++ glusterfs_graph_fini(graph); ++ glusterfs_graph_destroy(graph); ++out: ++ return NULL; ++} ++ ++glusterfs_graph_t * ++glusterfs_muxsvc_setup_parent_graph(glusterfs_ctx_t *ctx, char *name, ++ char *type) ++{ ++ glusterfs_graph_t *parent_graph = NULL; ++ xlator_t *ixl = NULL; ++ int ret = -1; ++ parent_graph = GF_CALLOC(1, sizeof(*parent_graph), ++ gf_common_mt_glusterfs_graph_t); ++ if (!parent_graph) ++ goto out; ++ ++ INIT_LIST_HEAD(&parent_graph->list); ++ ++ ctx->active = parent_graph; ++ ixl = GF_CALLOC(1, sizeof(*ixl), gf_common_mt_xlator_t); ++ if (!ixl) ++ goto out; ++ ++ ixl->ctx = ctx; ++ ixl->graph = parent_graph; ++ ixl->options = dict_new(); ++ if (!ixl->options) ++ goto out; ++ ++ ixl->name = gf_strdup(name); ++ if (!ixl->name) ++ goto out; ++ ++ ixl->is_autoloaded = 1; ++ ++ if (xlator_set_type(ixl, type) == -1) { ++ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED, ++ "%s (%s) set type failed", name, type); ++ goto out; ++ } ++ ++ glusterfs_graph_set_first(parent_graph, ixl); ++ parent_graph->top = ixl; ++ ixl = NULL; ++ ++ gettimeofday(&parent_graph->dob, NULL); ++ fill_uuid(parent_graph->graph_uuid, 128); ++ parent_graph->id = ctx->graph_id++; ++ ret = 0; ++out: ++ if (ixl) ++ xlator_destroy(ixl); ++ ++ if (ret) { ++ glusterfs_muxsvc_cleanup_parent(ctx, parent_graph); ++ parent_graph = NULL; ++ } ++ return parent_graph; ++} ++ ++int ++glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) ++{ ++ xlator_t *last_xl = NULL; ++ glusterfs_graph_t *graph = NULL; ++ glusterfs_graph_t *parent_graph = NULL; ++ pthread_t clean_graph = { ++ 0, ++ }; ++ int ret = -1; ++ xlator_t *xl = NULL; ++ ++ if (!ctx || !ctx->active || !volfile_obj) ++ goto out; ++ parent_graph = ctx->active; ++ graph = volfile_obj->graph; ++ if (graph && graph->first) ++ xl = graph->first; ++ ++ last_xl = graph->last_xl; ++ if (last_xl) ++ last_xl->next = NULL; ++ if (!xl || xl->cleanup_starting) ++ goto out; ++ ++ xl->cleanup_starting = 1; ++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED, ++ "detaching child %s", volfile_obj->vol_id); ++ ++ list_del_init(&volfile_obj->volfile_list); ++ glusterfs_mux_xlator_unlink(parent_graph->top, xl); ++ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); ++ parent_graph->xl_count -= graph->xl_count; ++ parent_graph->leaf_count -= graph->leaf_count; ++ default_notify(xl, GF_EVENT_PARENT_DOWN, xl); ++ parent_graph->id++; ++ ret = 0; ++out: ++ if (!ret) { ++ list_del_init(&volfile_obj->volfile_list); ++ if (graph) { ++ ret = gf_thread_create_detached( ++ &clean_graph, glusterfs_graph_cleanup, graph, "graph_clean"); ++ if (ret) { ++ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, ++ LG_MSG_GRAPH_CLEANUP_FAILED, ++ "%s failed to create clean " ++ "up thread", ++ volfile_obj->vol_id); ++ ret = 0; ++ } ++ } ++ GF_FREE(volfile_obj); ++ } ++ return ret; ++} ++ ++int ++glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, ++ char *volfile_id, char *checksum) ++{ ++ glusterfs_graph_t *graph = NULL; ++ glusterfs_graph_t *parent_graph = NULL; ++ glusterfs_graph_t *clean_graph = NULL; ++ int ret = -1; ++ xlator_t *xl = NULL; ++ xlator_t *last_xl = NULL; ++ gf_volfile_t *volfile_obj = NULL; ++ pthread_t thread_id = { ++ 0, ++ }; ++ ++ if (!ctx) ++ goto out; ++ parent_graph = ctx->active; ++ graph = glusterfs_graph_construct(fp); ++ if (!graph) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, ++ "failed to construct the graph"); ++ goto out; ++ } ++ graph->last_xl = glusterfs_get_last_xlator(graph); ++ ++ for (xl = graph->first; xl; xl = xl->next) { ++ if (strcmp(xl->type, "mount/fuse") == 0) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, ++ LG_MSG_GRAPH_ATTACH_FAILED, ++ "fuse xlator cannot be specified in volume file"); ++ goto out; ++ } ++ } ++ ++ graph->leaf_count = glusterfs_count_leaves(glusterfs_root(graph)); ++ xl = graph->first; ++ /* TODO memory leaks everywhere need to free graph in case of error */ ++ if (glusterfs_graph_prepare(graph, ctx, xl->name)) { ++ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, ++ "failed to prepare graph for xlator %s", xl->name); ++ ret = -1; ++ goto out; ++ } else if (glusterfs_graph_init(graph)) { ++ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, ++ "failed to initialize graph for xlator %s", xl->name); ++ ret = -1; ++ goto out; ++ } else if (glusterfs_graph_parent_up(graph)) { ++ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, ++ "failed to link the graphs for xlator %s ", xl->name); ++ ret = -1; ++ goto out; ++ } ++ ++ if (!parent_graph) { ++ parent_graph = glusterfs_muxsvc_setup_parent_graph(ctx, "glustershd", ++ "debug/io-stats"); ++ if (!parent_graph) ++ goto out; ++ ((xlator_t *)parent_graph->top)->next = xl; ++ clean_graph = parent_graph; ++ } else { ++ last_xl = parent_graph->last_xl; ++ if (last_xl) ++ last_xl->next = xl; ++ xl->prev = last_xl; ++ } ++ parent_graph->last_xl = graph->last_xl; ++ ++ ret = glusterfs_xlator_link(parent_graph->top, xl); ++ if (ret) { ++ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED, ++ "parent up notification failed"); ++ goto out; ++ } ++ parent_graph->xl_count += graph->xl_count; ++ parent_graph->leaf_count += graph->leaf_count; ++ parent_graph->id++; ++ ++ if (!volfile_obj) { ++ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t); ++ if (!volfile_obj) { ++ ret = -1; ++ goto out; ++ } ++ } ++ ++ graph->used = 1; ++ parent_graph->id++; ++ list_add(&graph->list, &ctx->graphs); ++ INIT_LIST_HEAD(&volfile_obj->volfile_list); ++ volfile_obj->graph = graph; ++ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s", ++ volfile_id); ++ memcpy(volfile_obj->volfile_checksum, checksum, ++ sizeof(volfile_obj->volfile_checksum)); ++ list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list); ++ ++ gf_log_dump_graph(fp, graph); ++ graph = NULL; ++ ++ ret = 0; ++out: ++ if (ret) { ++ if (graph) { ++ gluster_graph_take_reference(graph->first); ++ ret = gf_thread_create_detached(&thread_id, glusterfs_graph_cleanup, ++ graph, "graph_clean"); ++ if (ret) { ++ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, ++ LG_MSG_GRAPH_CLEANUP_FAILED, ++ "%s failed to create clean " ++ "up thread", ++ volfile_id); ++ ret = 0; ++ } ++ } ++ if (clean_graph) ++ glusterfs_muxsvc_cleanup_parent(ctx, clean_graph); ++ } ++ return ret; ++} ++ ++int ++glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, ++ gf_volfile_t *volfile_obj, char *checksum) ++{ ++ glusterfs_graph_t *oldvolfile_graph = NULL; ++ glusterfs_graph_t *newvolfile_graph = NULL; ++ ++ int ret = -1; ++ ++ if (!ctx) { ++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL, ++ "ctx is NULL"); ++ goto out; ++ } ++ ++ /* Change the message id */ ++ if (!volfile_obj) { ++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL, ++ "failed to get volfile object"); ++ goto out; ++ } ++ ++ oldvolfile_graph = volfile_obj->graph; ++ if (!oldvolfile_graph) { ++ goto out; ++ } ++ ++ newvolfile_graph = glusterfs_graph_construct(newvolfile_fp); ++ ++ if (!newvolfile_graph) { ++ goto out; ++ } ++ newvolfile_graph->last_xl = glusterfs_get_last_xlator(newvolfile_graph); ++ ++ glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first); ++ ++ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) { ++ ret = glusterfs_process_svc_detach(ctx, volfile_obj); ++ if (ret) { ++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, ++ LG_MSG_GRAPH_CLEANUP_FAILED, ++ "Could not detach " ++ "old graph. Aborting the reconfiguration operation"); ++ goto out; ++ } ++ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, ++ volfile_obj->vol_id, checksum); ++ goto out; ++ } ++ ++ gf_msg_debug("glusterfsd-mgmt", 0, ++ "Only options have changed in the" ++ " new graph"); ++ ++ ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph); ++ if (ret) { ++ gf_msg_debug("glusterfsd-mgmt", 0, ++ "Could not reconfigure " ++ "new options in old graph"); ++ goto out; ++ } ++ memcpy(volfile_obj->volfile_checksum, checksum, ++ sizeof(volfile_obj->volfile_checksum)); ++ ++ ret = 0; ++out: ++ ++ if (newvolfile_graph) ++ glusterfs_graph_destroy(newvolfile_graph); ++ ++ return ret; ++} +diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y +index 5b92985..c60ff38 100644 +--- a/libglusterfs/src/graph.y ++++ b/libglusterfs/src/graph.y +@@ -542,6 +542,9 @@ glusterfs_graph_new () + + INIT_LIST_HEAD (&graph->list); + ++ pthread_mutex_init(&graph->mutex, NULL); ++ pthread_cond_init(&graph->child_down_cond, NULL); ++ + gettimeofday (&graph->dob, NULL); + + return graph; +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index e33d5cf..fa2025e 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -1154,3 +1154,8 @@ gf_changelog_register_generic + gf_gfid_generate_from_xxh64 + find_xlator_option_in_cmd_args_t + gf_d_type_from_ia_type ++glusterfs_graph_fini ++glusterfs_process_svc_attach_volfp ++glusterfs_mux_volfile_reconfigure ++glusterfs_process_svc_detach ++mgmt_is_multiplexed_daemon +diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c +index d18b50f..0cf80c0 100644 +--- a/libglusterfs/src/statedump.c ++++ b/libglusterfs/src/statedump.c +@@ -810,7 +810,8 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) + if (!ctx) + goto out; + +- if (ctx && ctx->active) { ++ if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) && ++ (ctx && ctx->active)) { + top = ctx->active->first; + for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { + brick_count++; +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index dc1e887..5d6f8d2 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1463,3 +1463,19 @@ gluster_graph_take_reference(xlator_t *tree) + } + return; + } ++ ++gf_boolean_t ++mgmt_is_multiplexed_daemon(char *name) ++{ ++ const char *mux_daemons[] = {"glustershd", NULL}; ++ int i; ++ ++ if (!name) ++ return _gf_false; ++ ++ for (i = 0; mux_daemons[i]; i++) { ++ if (!strcmp(name, mux_daemons[i])) ++ return _gf_true; ++ } ++ return _gf_false; ++} +diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h +index 779878f..7275d75 100644 +--- a/rpc/rpc-lib/src/protocol-common.h ++++ b/rpc/rpc-lib/src/protocol-common.h +@@ -245,6 +245,8 @@ enum glusterd_brick_procnum { + GLUSTERD_NODE_BITROT, + GLUSTERD_BRICK_ATTACH, + GLUSTERD_DUMP_METRICS, ++ GLUSTERD_SVC_ATTACH, ++ GLUSTERD_SVC_DETACH, + GLUSTERD_BRICK_MAXVALUE, + }; + +diff --git a/tests/basic/glusterd/heald.t b/tests/basic/glusterd/heald.t +index ca112ad..7dae3c3 100644 +--- a/tests/basic/glusterd/heald.t ++++ b/tests/basic/glusterd/heald.t +@@ -7,11 +7,16 @@ + # Covers enable/disable at the moment. Will be enhanced later to include + # the other commands as well. + ++function is_pid_running { ++ local pid=$1 ++ num=`ps auxww | grep glustershd | grep $pid | grep -v grep | wc -l` ++ echo $num ++} ++ + cleanup; + TEST glusterd + TEST pidof glusterd + +-volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol" + #Commands should fail when volume doesn't exist + TEST ! $CLI volume heal non-existent-volume enable + TEST ! $CLI volume heal non-existent-volume disable +@@ -20,51 +25,55 @@ TEST ! $CLI volume heal non-existent-volume disable + # volumes + TEST $CLI volume create dist $H0:$B0/dist + TEST $CLI volume start dist +-TEST "[ -z $(get_shd_process_pid)]" ++TEST "[ -z $(get_shd_process_pid dist)]" + TEST ! $CLI volume heal dist enable + TEST ! $CLI volume heal dist disable + + # Commands should work on replicate/disperse volume. + TEST $CLI volume create r2 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1 +-TEST "[ -z $(get_shd_process_pid)]" ++TEST "[ -z $(get_shd_process_pid r2)]" + TEST $CLI volume start r2 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2 + TEST $CLI volume heal r2 enable + EXPECT "enable" volume_option r2 "cluster.self-heal-daemon" +-EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++volfiler2=$(gluster system:: getwd)"/vols/r2/r2-shd.vol" ++EXPECT "enable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2 ++pid=$( get_shd_process_pid r2 ) + TEST $CLI volume heal r2 disable + EXPECT "disable" volume_option r2 "cluster.self-heal-daemon" +-EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++EXPECT "disable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT "1" is_pid_running $pid + + # Commands should work on disperse volume. + TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2 + TEST $CLI volume start ec2 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2 + TEST $CLI volume heal ec2 enable + EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++volfileec2=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol" ++EXPECT "enable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2 ++pid=$(get_shd_process_pid ec2) + TEST $CLI volume heal ec2 disable + EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++EXPECT "disable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT "1" is_pid_running $pid + + #Check that shd graph is rewritten correctly on volume stop/start +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse ++ ++EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate + TEST $CLI volume stop r2 +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse + TEST $CLI volume stop ec2 + # When both the volumes are stopped glustershd volfile is not modified just the + # process is stopped +-TEST "[ -z $(get_shd_process_pid) ]" ++TEST "[ -z $(get_shd_process_pid dist) ]" ++TEST "[ -z $(get_shd_process_pid ec2) ]" + + TEST $CLI volume start r2 +-EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate + + TEST $CLI volume set r2 self-heal-daemon on + TEST $CLI volume set r2 cluster.self-heal-daemon off +diff --git a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t +index cdb1a33..e6e65c4 100644 +--- a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t ++++ b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t +@@ -55,9 +55,9 @@ TEST kill_glusterd 1 + #Bring back 1st glusterd + TEST $glusterd_1 + +-# We need to wait till PROCESS_UP_TIMEOUT and then check shd service does not +-# come up on node 2 +-sleep $PROCESS_UP_TIMEOUT +-EXPECT "N" shd_up_status_2 ++# We need to wait till PROCESS_UP_TIMEOUT and then check shd service started ++#on node 2, because once glusterd regains quorum, it will restart all volume ++#level daemons ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2 + + cleanup; +diff --git a/tests/volume.rc b/tests/volume.rc +index 87ca958..289b197 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -237,11 +237,13 @@ function ec_child_up_count_shd { + } + + function get_shd_process_pid { +- ps auxww | grep glusterfs | grep -E "glustershd/glustershd.pid" | awk '{print $2}' | head -1 ++ local vol=$1 ++ ps auxww | grep "process-name\ glustershd" | awk '{print $2}' | head -1 + } + + function generate_shd_statedump { +- generate_statedump $(get_shd_process_pid) ++ local vol=$1 ++ generate_statedump $(get_shd_process_pid $vol) + } + + function generate_nfs_statedump { +diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am +index 5fe5156..11ae189 100644 +--- a/xlators/mgmt/glusterd/src/Makefile.am ++++ b/xlators/mgmt/glusterd/src/Makefile.am +@@ -18,11 +18,12 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ + glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \ + glusterd-mgmt.c glusterd-peer-utils.c glusterd-statedump.c \ + glusterd-snapshot-utils.c glusterd-conn-mgmt.c \ +- glusterd-proc-mgmt.c glusterd-svc-mgmt.c glusterd-shd-svc.c \ ++ glusterd-proc-mgmt.c glusterd-svc-mgmt.c \ + glusterd-nfs-svc.c glusterd-quotad-svc.c glusterd-svc-helper.c \ + glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \ + glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \ + glusterd-reset-brick.c glusterd-tierd-svc.c glusterd-tierd-svc-helper.c \ ++ glusterd-shd-svc.c glusterd-shd-svc-helper.c \ + glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c + + +@@ -38,11 +39,12 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ + glusterd-mgmt.h glusterd-messages.h glusterd-peer-utils.h \ + glusterd-statedump.h glusterd-snapshot-utils.h glusterd-geo-rep.h \ + glusterd-conn-mgmt.h glusterd-conn-helper.h glusterd-proc-mgmt.h \ +- glusterd-svc-mgmt.h glusterd-shd-svc.h glusterd-nfs-svc.h \ ++ glusterd-svc-mgmt.h glusterd-nfs-svc.h \ + glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \ + glusterd-snapd-svc-helper.h glusterd-rcu.h glusterd-bitd-svc.h \ + glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \ + glusterd-tierd-svc.h glusterd-tierd-svc-helper.h \ ++ glusterd-shd-svc.h glusterd-shd-svc-helper.h \ + glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \ + $(CONTRIBDIR)/userspace-rcu/rculist-extra.h + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index ad9a572..042a805 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -2863,7 +2863,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr) + } + + if (start_remove && volinfo->status == GLUSTERD_STATUS_STARTED) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL, + "Unable to reconfigure NFS-Server"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c +index e80e152..052438c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c +@@ -132,3 +132,45 @@ glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath, + glusterd_set_socket_filepath(sockfilepath, socketpath, len); + return 0; + } ++ ++int ++__glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, ++ rpc_clnt_event_t event, void *data) ++{ ++ glusterd_conf_t *conf = THIS->private; ++ glusterd_svc_proc_t *mux_proc = mydata; ++ int ret = -1; ++ ++ /* Silently ignoring this error, exactly like the current ++ * implementation */ ++ if (!mux_proc) ++ return 0; ++ ++ if (event == RPC_CLNT_DESTROY) { ++ /*RPC_CLNT_DESTROY will only called after mux_proc detached from the ++ * list. So it is safe to call without lock. Processing ++ * RPC_CLNT_DESTROY under a lock will lead to deadlock. ++ */ ++ if (mux_proc->data) { ++ glusterd_volinfo_unref(mux_proc->data); ++ mux_proc->data = NULL; ++ } ++ GF_FREE(mux_proc); ++ ret = 0; ++ } else { ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ ret = mux_proc->notify(mux_proc, event); ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ } ++ return ret; ++} ++ ++int ++glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, ++ rpc_clnt_event_t event, void *data) ++{ ++ return glusterd_big_locked_notify(rpc, mydata, event, data, ++ __glusterd_muxsvc_conn_common_notify); ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h +index 602c0ba..d1c4607 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h ++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h +@@ -43,9 +43,11 @@ glusterd_conn_disconnect(glusterd_conn_t *conn); + int + glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data); ++int ++glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, ++ rpc_clnt_event_t event, void *data); + + int32_t + glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath, + int len); +- + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c +index f9c8617..b01fd4d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c +@@ -370,6 +370,7 @@ int + glusterd_gfproxydsvc_restart() + { + glusterd_volinfo_t *volinfo = NULL; ++ glusterd_volinfo_t *tmp = NULL; + int ret = -1; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; +@@ -380,7 +381,7 @@ glusterd_gfproxydsvc_restart() + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + +- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) ++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) + { + /* Start per volume gfproxyd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 528993c..1cb9013 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -5928,6 +5928,11 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict) + + GF_FREE(rebal_data); + ++ fprintf(fp, "Volume%d.shd_svc.online_status: %s\n", count, ++ volinfo->shd.svc.online ? "Online" : "Offline"); ++ fprintf(fp, "Volume%d.shd_svc.inited: %s\n", count, ++ volinfo->shd.svc.inited ? "True" : "False"); ++ + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + ret = glusterd_volume_get_hot_tier_type_str(volinfo, + &hot_tier_type_str); +@@ -5997,12 +6002,6 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict) + + fprintf(fp, "\n[Services]\n"); + +- if (priv->shd_svc.inited) { +- fprintf(fp, "svc%d.name: %s\n", ++count, priv->shd_svc.name); +- fprintf(fp, "svc%d.online_status: %s\n\n", count, +- priv->shd_svc.online ? "Online" : "Offline"); +- } +- + if (priv->nfs_svc.inited) { + fprintf(fp, "svc%d.name: %s\n", ++count, priv->nfs_svc.name); + fprintf(fp, "svc%d.online_status: %s\n\n", count, +diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c +index 5599a63..1ba58c3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c +@@ -30,6 +30,7 @@ + #include "rpcsvc.h" + #include "rpc-common-xdr.h" + #include "glusterd-gfproxyd-svc-helper.h" ++#include "glusterd-shd-svc-helper.h" + + extern struct rpc_clnt_program gd_peer_prog; + extern struct rpc_clnt_program gd_mgmt_prog; +@@ -328,6 +329,26 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, + goto out; + } + ++ volid_ptr = strstr(volume_id, "shd/"); ++ if (volid_ptr) { ++ volid_ptr = strchr(volid_ptr, '/'); ++ if (!volid_ptr) { ++ ret = -1; ++ goto out; ++ } ++ volid_ptr++; ++ ++ ret = glusterd_volinfo_find(volid_ptr, &volinfo); ++ if (ret == -1) { ++ gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo"); ++ goto out; ++ } ++ ++ glusterd_svc_build_shd_volfile_path(volinfo, path, path_len); ++ ret = 0; ++ goto out; ++ } ++ + volid_ptr = strstr(volume_id, "/snaps/"); + if (volid_ptr) { + ret = get_snap_volname_and_volinfo(volid_ptr, &volname, &volinfo); +diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h +index 7a784db..17052ce 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h ++++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h +@@ -51,6 +51,7 @@ typedef enum gf_gld_mem_types_ { + gf_gld_mt_missed_snapinfo_t, + gf_gld_mt_snap_create_args_t, + gf_gld_mt_glusterd_brick_proc_t, ++ gf_gld_mt_glusterd_svc_proc_t, + gf_gld_mt_end, + } gf_gld_mem_types_t; + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h +index c7b3ca8..424e15f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-messages.h ++++ b/xlators/mgmt/glusterd/src/glusterd-messages.h +@@ -298,6 +298,8 @@ GLFS_MSGID( + GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE, + GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL, + GD_MSG_MANAGER_FUNCTION_FAILED, GD_MSG_NFS_GANESHA_DISABLED, +- GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL); ++ GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL, ++ GD_MSG_SHD_START_FAIL, GD_MSG_SHD_OBJ_GET_FAIL, GD_MSG_SVC_ATTACH_FAIL, ++ GD_MSG_ATTACH_INFO, GD_MSG_DETACH_INFO, GD_MSG_SVC_DETACH_FAIL); + + #endif /* !_GLUSTERD_MESSAGES_H_ */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index df8a6ab..95f9707 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -44,6 +44,7 @@ + #include "glusterd-snapshot-utils.h" + #include "glusterd-svc-mgmt.h" + #include "glusterd-svc-helper.h" ++#include "glusterd-shd-svc-helper.h" + #include "glusterd-shd-svc.h" + #include "glusterd-nfs-svc.h" + #include "glusterd-quotad-svc.h" +@@ -2223,6 +2224,11 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key, + if (ret) + goto out; + ++ svc = &(volinfo->shd.svc); ++ ret = svc->reconfigure(volinfo); ++ if (ret) ++ goto out; ++ + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, +@@ -2237,7 +2243,7 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key, + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) + goto out; + } +@@ -2693,6 +2699,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, + if (ret) + goto out; + ++ svc = &(volinfo->shd.svc); ++ ret = svc->reconfigure(volinfo); ++ if (ret) ++ goto out; ++ + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, +@@ -2706,7 +2717,7 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, + } + } + if (svcs_reconfigure) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart " +@@ -3091,6 +3102,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + if (ret) + goto out; + ++ svc = &(volinfo->shd.svc); ++ ret = svc->reconfigure(volinfo); ++ if (ret) ++ goto out; ++ + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, +@@ -3106,7 +3122,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart services"); +@@ -3139,6 +3155,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + if (ret) + goto out; + ++ svc = &(volinfo->shd.svc); ++ ret = svc->reconfigure(volinfo); ++ if (ret) ++ goto out; ++ + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, +@@ -3154,7 +3175,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart services"); +@@ -3361,7 +3382,7 @@ glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) + goto out; + } +@@ -3644,14 +3665,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + other_count++; + node_count++; + +- } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { +- ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict, 0, +- vol_opts); +- if (ret) +- goto out; +- other_count++; +- node_count++; +- + } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) { + ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, 0, + vol_opts); +@@ -3685,6 +3698,12 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + goto out; + other_count++; + node_count++; ++ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { ++ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, other_index); ++ if (ret) ++ goto out; ++ other_count++; ++ node_count++; + } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) { + ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick); + if (ret) +@@ -3747,6 +3766,19 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + node_count++; + } + ++ if (glusterd_is_shd_compatible_volume(volinfo)) { ++ shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts); ++ if (shd_enabled) { ++ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, ++ other_index); ++ if (ret) ++ goto out; ++ other_count++; ++ other_index++; ++ node_count++; ++ } ++ } ++ + nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY, + _gf_false); + if (!nfs_disabled) { +@@ -3759,18 +3791,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + node_count++; + } + +- if (glusterd_is_shd_compatible_volume(volinfo)) +- shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts); +- if (shd_enabled) { +- ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict, +- other_index, vol_opts); +- if (ret) +- goto out; +- other_count++; +- node_count++; +- other_index++; +- } +- + if (glusterd_is_volume_quota_enabled(volinfo)) { + ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, + other_index, vol_opts); +@@ -6875,16 +6895,18 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op, + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; ++ glusterd_svc_t *svc = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); ++ svc = &(volinfo->shd.svc); + + switch (heal_op) { + case GF_SHD_OP_INDEX_SUMMARY: + case GF_SHD_OP_STATISTICS_HEAL_COUNT: +- if (!priv->shd_svc.online) { ++ if (!svc->online) { + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL, + "Received " +@@ -6905,7 +6927,7 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op, + break; + + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: +- if (!priv->shd_svc.online) { ++ if (!svc->online) { + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL, + "Received " +@@ -7040,7 +7062,7 @@ glusterd_bricks_select_heal_volume(dict_t *dict, char **op_errstr, + ret = -1; + goto out; + } else { +- pending_node->node = &(priv->shd_svc); ++ pending_node->node = &(volinfo->shd.svc); + pending_node->type = GD_NODE_SHD; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; +@@ -7174,6 +7196,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr, + glusterd_pending_node_t *pending_node = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; ++ glusterd_svc_t *svc = NULL; + + GF_ASSERT(dict); + +@@ -7269,7 +7292,8 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr, + + ret = 0; + } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { +- if (!priv->shd_svc.online) { ++ svc = &(volinfo->shd.svc); ++ if (!svc->online) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SELF_HEALD_DISABLED, + "Self-heal daemon is not running"); +@@ -7281,7 +7305,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr, + ret = -1; + goto out; + } +- pending_node->node = &(priv->shd_svc); ++ pending_node->node = svc; + pending_node->type = GD_NODE_SHD; + pending_node->index = 0; + cds_list_add_tail(&pending_node->list, selected); +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +new file mode 100644 +index 0000000..9196758 +--- /dev/null ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +@@ -0,0 +1,140 @@ ++/* ++ Copyright (c) 2016 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include "glusterd.h" ++#include "glusterd-utils.h" ++#include "glusterd-shd-svc-helper.h" ++#include "glusterd-messages.h" ++#include "glusterd-volgen.h" ++ ++void ++glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path, ++ int path_len) ++{ ++ char sockfilepath[PATH_MAX] = { ++ 0, ++ }; ++ char rundir[PATH_MAX] = { ++ 0, ++ }; ++ int32_t len = 0; ++ glusterd_conf_t *priv = THIS->private; ++ ++ if (!priv) ++ return; ++ ++ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); ++ len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir, ++ uuid_utoa(MY_UUID)); ++ if ((len < 0) || (len >= sizeof(sockfilepath))) { ++ sockfilepath[0] = 0; ++ } ++ ++ glusterd_set_socket_filepath(sockfilepath, path, path_len); ++} ++ ++void ++glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path, ++ int path_len) ++{ ++ char rundir[PATH_MAX] = { ++ 0, ++ }; ++ glusterd_conf_t *priv = THIS->private; ++ ++ if (!priv) ++ return; ++ ++ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); ++ ++ snprintf(path, path_len, "%s/%s-shd.pid", rundir, volinfo->volname); ++} ++ ++void ++glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, ++ int path_len) ++{ ++ char workdir[PATH_MAX] = { ++ 0, ++ }; ++ glusterd_conf_t *priv = THIS->private; ++ ++ if (!priv) ++ return; ++ ++ GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv); ++ ++ snprintf(path, path_len, "%s/%s-shd.vol", workdir, volinfo->volname); ++} ++ ++void ++glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len) ++{ ++ snprintf(logdir, len, "%s/shd/%s", DEFAULT_LOG_FILE_DIRECTORY, volname); ++} ++ ++void ++glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len) ++{ ++ snprintf(logfile, len, "%s/shd.log", logdir); ++} ++ ++void ++glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd) ++{ ++ glusterd_svc_proc_t *svc_proc = NULL; ++ glusterd_svc_t *svc = NULL; ++ glusterd_conf_t *conf = NULL; ++ gf_boolean_t need_unref = _gf_false; ++ rpc_clnt_t *rpc = NULL; ++ ++ conf = THIS->private; ++ if (!conf) ++ return; ++ ++ GF_VALIDATE_OR_GOTO(THIS->name, conf, out); ++ GF_VALIDATE_OR_GOTO(THIS->name, shd, out); ++ ++ svc = &shd->svc; ++ shd->attached = _gf_false; ++ ++ if (svc->conn.rpc) { ++ rpc_clnt_unref(svc->conn.rpc); ++ svc->conn.rpc = NULL; ++ } ++ ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ svc_proc = svc->svc_proc; ++ svc->svc_proc = NULL; ++ svc->inited = _gf_false; ++ cds_list_del_init(&svc->mux_svc); ++ glusterd_unlink_file(svc->proc.pidfile); ++ ++ if (svc_proc && cds_list_empty(&svc_proc->svcs)) { ++ cds_list_del_init(&svc_proc->svc_proc_list); ++ /* We cannot free svc_proc list from here. Because ++ * if there are pending events on the rpc, it will ++ * try to access the corresponding svc_proc, so unrefing ++ * rpc request and then cleaning up the memory is carried ++ * from the notify function upon RPC_CLNT_DESTROY destroy. ++ */ ++ need_unref = _gf_true; ++ rpc = svc_proc->rpc; ++ svc_proc->rpc = NULL; ++ } ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ /*rpc unref has to be performed outside the lock*/ ++ if (need_unref && rpc) ++ rpc_clnt_unref(rpc); ++out: ++ return; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +new file mode 100644 +index 0000000..c70702c +--- /dev/null ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +@@ -0,0 +1,45 @@ ++/* ++ Copyright (c) 2016 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef _GLUSTERD_SHD_SVC_HELPER_H_ ++#define _GLUSTERD_SHD_SVC_HELPER_H_ ++ ++#include "glusterd.h" ++#include "glusterd-svc-mgmt.h" ++ ++void ++glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path, ++ int path_len); ++ ++void ++glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path, ++ int path_len); ++ ++void ++glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, ++ int path_len); ++ ++void ++glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len); ++ ++void ++glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len); ++ ++void ++glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd); ++ ++int ++glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo, ++ glusterd_svc_t *svc, int flags); ++ ++int ++glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo); ++ ++#endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 69e27cb..937ea30 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -13,9 +13,10 @@ + #include "glusterd.h" + #include "glusterd-utils.h" + #include "glusterd-volgen.h" +-#include "glusterd-svc-mgmt.h" + #include "glusterd-shd-svc.h" ++#include "glusterd-shd-svc-helper.h" + #include "glusterd-svc-helper.h" ++#include "glusterd-store.h" + + #define GD_SHD_PROCESS_NAME "--process-name" + char *shd_svc_name = "glustershd"; +@@ -23,27 +24,145 @@ char *shd_svc_name = "glustershd"; + void + glusterd_shdsvc_build(glusterd_svc_t *svc) + { ++ int ret = -1; ++ ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name); ++ if (ret < 0) ++ return; ++ ++ CDS_INIT_LIST_HEAD(&svc->mux_svc); + svc->manager = glusterd_shdsvc_manager; + svc->start = glusterd_shdsvc_start; +- svc->stop = glusterd_svc_stop; ++ svc->stop = glusterd_shdsvc_stop; ++ svc->reconfigure = glusterd_shdsvc_reconfigure; + } + + int +-glusterd_shdsvc_init(glusterd_svc_t *svc) ++glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, ++ glusterd_svc_proc_t *mux_svc) + { +- return glusterd_svc_init(svc, shd_svc_name); ++ int ret = -1; ++ char rundir[PATH_MAX] = { ++ 0, ++ }; ++ char sockpath[PATH_MAX] = { ++ 0, ++ }; ++ char pidfile[PATH_MAX] = { ++ 0, ++ }; ++ char volfile[PATH_MAX] = { ++ 0, ++ }; ++ char logdir[PATH_MAX] = { ++ 0, ++ }; ++ char logfile[PATH_MAX] = { ++ 0, ++ }; ++ char volfileid[256] = {0}; ++ glusterd_svc_t *svc = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_conf_t *priv = NULL; ++ glusterd_muxsvc_conn_notify_t notify = NULL; ++ xlator_t *this = NULL; ++ char *volfileserver = NULL; ++ int32_t len = 0; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO(THIS->name, this, out); ++ ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ volinfo = data; ++ GF_VALIDATE_OR_GOTO(this->name, data, out); ++ GF_VALIDATE_OR_GOTO(this->name, mux_svc, out); ++ ++ svc = &(volinfo->shd.svc); ++ ++ ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name); ++ if (ret < 0) ++ goto out; ++ ++ notify = glusterd_muxsvc_common_rpc_notify; ++ glusterd_store_perform_node_state_store(volinfo); ++ ++ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); ++ glusterd_svc_create_rundir(rundir); ++ ++ glusterd_svc_build_shd_logdir(logdir, volinfo->volname, sizeof(logdir)); ++ glusterd_svc_build_shd_logfile(logfile, logdir, sizeof(logfile)); ++ ++ /* Initialize the connection mgmt */ ++ if (mux_conn && mux_svc->rpc) { ++ /* multiplexed svc */ ++ svc->conn.frame_timeout = mux_conn->frame_timeout; ++ /* This will be unrefed from glusterd_shd_svcproc_cleanup*/ ++ svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc); ++ ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s", ++ mux_conn->sockpath); ++ } else { ++ ret = mkdir_p(logdir, 0755, _gf_true); ++ if ((ret == -1) && (EEXIST != errno)) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, ++ "Unable to create logdir %s", logdir); ++ goto out; ++ } ++ ++ glusterd_svc_build_shd_socket_filepath(volinfo, sockpath, ++ sizeof(sockpath)); ++ ret = glusterd_muxsvc_conn_init(&(svc->conn), mux_svc, sockpath, 600, ++ notify); ++ if (ret) ++ goto out; ++ /* This will be unrefed when the last svcs is detached from the list */ ++ if (!mux_svc->rpc) ++ mux_svc->rpc = rpc_clnt_ref(svc->conn.rpc); ++ } ++ ++ /* Initialize the process mgmt */ ++ glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile)); ++ glusterd_svc_build_shd_volfile_path(volinfo, volfile, PATH_MAX); ++ len = snprintf(volfileid, sizeof(volfileid), "shd/%s", volinfo->volname); ++ if ((len < 0) || (len >= sizeof(volfileid))) { ++ ret = -1; ++ goto out; ++ } ++ ++ if (dict_get_strn(this->options, "transport.socket.bind-address", ++ SLEN("transport.socket.bind-address"), ++ &volfileserver) != 0) { ++ volfileserver = "localhost"; ++ } ++ ret = glusterd_proc_init(&(svc->proc), shd_svc_name, pidfile, logdir, ++ logfile, volfile, volfileid, volfileserver); ++ if (ret) ++ goto out; ++ ++out: ++ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); ++ return ret; + } + +-static int +-glusterd_shdsvc_create_volfile() ++int ++glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo) + { + char filepath[PATH_MAX] = { + 0, + }; ++ + int ret = -1; +- glusterd_conf_t *conf = THIS->private; + dict_t *mod_dict = NULL; + ++ glusterd_svc_build_shd_volfile_path(volinfo, filepath, PATH_MAX); ++ if (!glusterd_is_shd_compatible_volume(volinfo)) { ++ /* If volfile exist, delete it. This case happens when we ++ * change from replica/ec to distribute. ++ */ ++ (void)glusterd_unlink_file(filepath); ++ ret = 0; ++ goto out; ++ } + mod_dict = dict_new(); + if (!mod_dict) + goto out; +@@ -64,9 +183,7 @@ glusterd_shdsvc_create_volfile() + if (ret) + goto out; + +- glusterd_svc_build_volfile_path(shd_svc_name, conf->workdir, filepath, +- sizeof(filepath)); +- ret = glusterd_create_global_volfile(build_shd_graph, filepath, mod_dict); ++ ret = glusterd_shdsvc_generate_volfile(volinfo, filepath, mod_dict); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volfile"); +@@ -81,26 +198,89 @@ out: + return ret; + } + ++gf_boolean_t ++glusterd_svcs_shd_compatible_volumes_stopped(glusterd_svc_t *svc) ++{ ++ glusterd_svc_proc_t *svc_proc = NULL; ++ glusterd_shdsvc_t *shd = NULL; ++ glusterd_svc_t *temp_svc = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ gf_boolean_t comp = _gf_false; ++ glusterd_conf_t *conf = THIS->private; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ svc_proc = svc->svc_proc; ++ if (!svc_proc) ++ goto unlock; ++ cds_list_for_each_entry(temp_svc, &svc_proc->svcs, mux_svc) ++ { ++ /* Get volinfo->shd from svc object */ ++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); ++ if (!shd) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, ++ "Failed to get shd object " ++ "from shd service"); ++ goto unlock; ++ } ++ ++ /* Get volinfo from shd */ ++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); ++ if (!volinfo) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "Failed to get volinfo from " ++ "from shd"); ++ goto unlock; ++ } ++ if (!glusterd_is_shd_compatible_volume(volinfo)) ++ continue; ++ if (volinfo->status == GLUSTERD_STATUS_STARTED) ++ goto unlock; ++ } ++ comp = _gf_true; ++ } ++unlock: ++ pthread_mutex_unlock(&conf->attach_lock); ++out: ++ return comp; ++} ++ + int + glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + { +- int ret = 0; ++ int ret = -1; + glusterd_volinfo_t *volinfo = NULL; + +- if (!svc->inited) { +- ret = glusterd_shdsvc_init(svc); +- if (ret) { +- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC, +- "Failed to init shd " +- "service"); +- goto out; +- } else { +- svc->inited = _gf_true; +- gf_msg_debug(THIS->name, 0, "shd service initialized"); ++ volinfo = data; ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); ++ ++ if (volinfo) ++ glusterd_volinfo_ref(volinfo); ++ ++ ret = glusterd_shdsvc_create_volfile(volinfo); ++ if (ret) ++ goto out; ++ ++ if (!glusterd_is_shd_compatible_volume(volinfo)) { ++ ret = 0; ++ if (svc->inited) { ++ /* This means glusterd was running for this volume and now ++ * it was converted to a non-shd volume. So just stop the shd ++ */ ++ ret = svc->stop(svc, SIGTERM); + } ++ goto out; + } + +- volinfo = data; ++ ret = glusterd_shd_svc_mux_init(volinfo, svc); ++ if (ret) { ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC, ++ "Failed to init shd service"); ++ goto out; ++ } + + /* If all the volumes are stopped or all shd compatible volumes + * are stopped then stop the service if: +@@ -110,31 +290,26 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + * - volinfo is NULL or + * - volinfo is present and volume is shd compatible + */ +- if (glusterd_are_all_volumes_stopped() || +- glusterd_all_shd_compatible_volumes_stopped()) { +- if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) { +- ret = svc->stop(svc, SIGTERM); +- } +- } else { +- if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) { +- ret = glusterd_shdsvc_create_volfile(); +- if (ret) +- goto out; +- +- ret = svc->stop(svc, SIGTERM); +- if (ret) +- goto out; ++ if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) { ++ /* TODO ++ * Take a lock and detach all svc's to stop the process ++ * also reset the init flag ++ */ ++ ret = svc->stop(svc, SIGTERM); ++ } else if (volinfo) { ++ ret = svc->stop(svc, SIGTERM); ++ if (ret) ++ goto out; + ++ if (volinfo->status == GLUSTERD_STATUS_STARTED) { + ret = svc->start(svc, flags); + if (ret) + goto out; +- +- ret = glusterd_conn_connect(&(svc->conn)); +- if (ret) +- goto out; + } + } + out: ++ if (volinfo) ++ glusterd_volinfo_unref(volinfo); + if (ret) + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); +@@ -143,7 +318,7 @@ out: + } + + int +-glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) ++glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags) + { + int ret = -1; + char glusterd_uuid_option[PATH_MAX] = {0}; +@@ -178,31 +353,136 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) + goto out; + + ret = glusterd_svc_start(svc, flags, cmdline); ++ if (ret) ++ goto out; + ++ ret = glusterd_conn_connect(&(svc->conn)); + out: + if (cmdline) + dict_unref(cmdline); ++ return ret; ++} + ++int ++glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo, ++ glusterd_svc_t *svc, int flags) ++{ ++ int ret = -1; ++ glusterd_svc_proc_t *mux_proc = NULL; ++ glusterd_conf_t *conf = NULL; ++ ++ conf = THIS->private; ++ ++ if (!conf || !volinfo || !svc) ++ return -1; ++ glusterd_shd_svcproc_cleanup(&volinfo->shd); ++ mux_proc = glusterd_svcprocess_new(); ++ if (!mux_proc) { ++ return -1; ++ } ++ ret = glusterd_shdsvc_init(volinfo, NULL, mux_proc); ++ if (ret) ++ return -1; ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs); ++ svc->svc_proc = mux_proc; ++ cds_list_del_init(&svc->mux_svc); ++ cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs); ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ ++ ret = glusterd_new_shd_svc_start(svc, flags); ++ if (!ret) { ++ volinfo->shd.attached = _gf_true; ++ } ++ return ret; ++} ++ ++int ++glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) ++{ ++ int ret = -1; ++ glusterd_shdsvc_t *shd = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_conf_t *conf = NULL; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ conf = THIS->private; ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ ++ /* Get volinfo->shd from svc object */ ++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); ++ if (!shd) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, ++ "Failed to get shd object " ++ "from shd service"); ++ return -1; ++ } ++ ++ /* Get volinfo from shd */ ++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); ++ if (!volinfo) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "Failed to get volinfo from " ++ "from shd"); ++ return -1; ++ } ++ ++ if (volinfo->status != GLUSTERD_STATUS_STARTED) ++ return -1; ++ ++ glusterd_volinfo_ref(volinfo); ++ if (!svc->inited) { ++ ret = glusterd_shd_svc_mux_init(volinfo, svc); ++ if (ret) ++ goto out; ++ } ++ ++ if (shd->attached) { ++ ret = glusterd_attach_svc(svc, volinfo, flags); ++ if (ret) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "Failed to attach shd svc(volume=%s) to pid=%d. Starting" ++ "a new process", ++ volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ ret = glusterd_recover_shd_attach_failure(volinfo, svc, flags); ++ } ++ goto out; ++ } ++ ret = glusterd_new_shd_svc_start(svc, flags); ++ if (!ret) { ++ shd->attached = _gf_true; ++ } ++out: ++ if (volinfo) ++ glusterd_volinfo_unref(volinfo); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; + } + + int +-glusterd_shdsvc_reconfigure() ++glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo) + { + int ret = -1; + xlator_t *this = NULL; +- glusterd_conf_t *priv = NULL; + gf_boolean_t identical = _gf_false; ++ dict_t *mod_dict = NULL; ++ glusterd_svc_t *svc = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + +- priv = this->private; +- GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ if (!volinfo) { ++ /* reconfigure will be called separately*/ ++ ret = 0; ++ goto out; ++ } + +- if (glusterd_all_shd_compatible_volumes_stopped()) ++ glusterd_volinfo_ref(volinfo); ++ svc = &(volinfo->shd.svc); ++ if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) + goto manager; + + /* +@@ -210,8 +490,42 @@ glusterd_shdsvc_reconfigure() + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ +- ret = glusterd_svc_check_volfile_identical(priv->shd_svc.name, +- build_shd_graph, &identical); ++ ++ if (!glusterd_is_shd_compatible_volume(volinfo)) { ++ if (svc->inited) ++ goto manager; ++ ++ /* Nothing to do if not shd compatible */ ++ ret = 0; ++ goto out; ++ } ++ mod_dict = dict_new(); ++ if (!mod_dict) ++ goto out; ++ ++ ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0); ++ if (ret) ++ goto out; ++ ++ ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on"); ++ if (ret) ++ goto out; ++ ++ ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on"); ++ if (ret) ++ goto out; ++ ++ ret = dict_set_int32(mod_dict, "graph-check", 1); ++ if (ret) ++ goto out; ++ ++ ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on"); ++ if (ret) ++ goto out; ++ ++ ret = glusterd_volume_svc_check_volfile_identical( ++ "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile, ++ &identical); + if (ret) + goto out; + +@@ -226,8 +540,9 @@ glusterd_shdsvc_reconfigure() + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ +- ret = glusterd_svc_check_topology_identical(priv->shd_svc.name, +- build_shd_graph, &identical); ++ ret = glusterd_volume_svc_check_topology_identical( ++ "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile, ++ &identical); + if (ret) + goto out; + +@@ -235,7 +550,7 @@ glusterd_shdsvc_reconfigure() + * options to shd volfile, so that shd will be reconfigured. + */ + if (identical) { +- ret = glusterd_shdsvc_create_volfile(); ++ ret = glusterd_shdsvc_create_volfile(volinfo); + if (ret == 0) { /* Only if above PASSES */ + ret = glusterd_fetchspec_notify(THIS); + } +@@ -243,12 +558,129 @@ glusterd_shdsvc_reconfigure() + } + manager: + /* +- * shd volfile's topology has been changed. shd server needs +- * to be RESTARTED to ACT on the changed volfile. ++ * shd volfile's topology has been changed. volfile needs ++ * to be RECONFIGURED to ACT on the changed volfile. + */ +- ret = priv->shd_svc.manager(&(priv->shd_svc), NULL, PROC_START_NO_WAIT); ++ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + + out: ++ if (volinfo) ++ glusterd_volinfo_unref(volinfo); ++ if (mod_dict) ++ dict_unref(mod_dict); + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; + } ++ ++int ++glusterd_shdsvc_restart() ++{ ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_volinfo_t *tmp = NULL; ++ int ret = -1; ++ xlator_t *this = THIS; ++ glusterd_conf_t *conf = NULL; ++ glusterd_svc_t *svc = NULL; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, out); ++ ++ pthread_mutex_lock(&conf->volume_lock); ++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) ++ { ++ glusterd_volinfo_ref(volinfo); ++ pthread_mutex_unlock(&conf->volume_lock); ++ /* Start per volume shd svc */ ++ if (volinfo->status == GLUSTERD_STATUS_STARTED) { ++ svc = &(volinfo->shd.svc); ++ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SHD_START_FAIL, ++ "Couldn't start shd for " ++ "vol: %s on restart", ++ volinfo->volname); ++ gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s", ++ volinfo->volname, svc->name); ++ glusterd_volinfo_unref(volinfo); ++ goto out; ++ } ++ } ++ glusterd_volinfo_unref(volinfo); ++ pthread_mutex_lock(&conf->volume_lock); ++ } ++ pthread_mutex_unlock(&conf->volume_lock); ++out: ++ return ret; ++} ++ ++int ++glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig) ++{ ++ int ret = -1; ++ glusterd_svc_proc_t *svc_proc = NULL; ++ glusterd_shdsvc_t *shd = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ gf_boolean_t empty = _gf_false; ++ glusterd_conf_t *conf = NULL; ++ int pid = -1; ++ ++ conf = THIS->private; ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ svc_proc = svc->svc_proc; ++ GF_VALIDATE_OR_GOTO("glusterd", svc_proc, out); ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ ++ /* Get volinfo->shd from svc object */ ++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); ++ if (!shd) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, ++ "Failed to get shd object " ++ "from shd service"); ++ return -1; ++ } ++ ++ /* Get volinfo from shd */ ++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); ++ if (!volinfo) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "Failed to get volinfo from " ++ "from shd"); ++ return -1; ++ } ++ ++ glusterd_volinfo_ref(volinfo); ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ gf_is_service_running(svc->proc.pidfile, &pid); ++ cds_list_del_init(&svc->mux_svc); ++ empty = cds_list_empty(&svc_proc->svcs); ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ if (empty) { ++ /* Unref will happen when destroying the connection */ ++ glusterd_volinfo_ref(volinfo); ++ svc_proc->data = volinfo; ++ ret = glusterd_svc_stop(svc, sig); ++ } ++ if (!empty && pid != -1) { ++ ret = glusterd_detach_svc(svc, volinfo, sig); ++ if (ret) ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, ++ "shd service is failed to detach volume %s from pid %d", ++ volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ else ++ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS, ++ "Shd service is detached for volume %s from pid %d", ++ volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ } ++ svc->online = _gf_false; ++ (void)glusterd_unlink_file((char *)svc->proc.pidfile); ++ glusterd_shd_svcproc_cleanup(shd); ++ ret = 0; ++ glusterd_volinfo_unref(volinfo); ++out: ++ gf_msg_debug(THIS->name, 0, "Returning %d", ret); ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h +index 775a9d4..55b409f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h +@@ -12,12 +12,20 @@ + #define _GLUSTERD_SHD_SVC_H_ + + #include "glusterd-svc-mgmt.h" ++#include "glusterd.h" ++ ++typedef struct glusterd_shdsvc_ glusterd_shdsvc_t; ++struct glusterd_shdsvc_ { ++ glusterd_svc_t svc; ++ gf_boolean_t attached; ++}; + + void + glusterd_shdsvc_build(glusterd_svc_t *svc); + + int +-glusterd_shdsvc_init(glusterd_svc_t *svc); ++glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, ++ glusterd_svc_proc_t *svc_proc); + + int + glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags); +@@ -27,4 +35,11 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags); + + int + glusterd_shdsvc_reconfigure(); ++ ++int ++glusterd_shdsvc_restart(); ++ ++int ++glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig); ++ + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c +index 54a7bd1..943b1c6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c +@@ -748,6 +748,16 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv) + } + } + ++ if (glusterd_is_shd_compatible_volume(volinfo)) { ++ svc = &(volinfo->shd.svc); ++ ret = svc->stop(svc, SIGTERM); ++ if (ret) { ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, ++ "Failed " ++ "to stop shd daemon service"); ++ } ++ } ++ + if (glusterd_is_gfproxyd_enabled(volinfo)) { + svc = &(volinfo->gfproxyd.svc); + ret = svc->stop(svc, SIGTERM); +@@ -775,7 +785,7 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv) + } + + /*Reconfigure all daemon services upon peer detach*/ +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(NULL); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed to reconfigure all daemon services."); +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +index 56bab07..1da4076 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +@@ -366,6 +366,7 @@ int + glusterd_snapdsvc_restart() + { + glusterd_volinfo_t *volinfo = NULL; ++ glusterd_volinfo_t *tmp = NULL; + int ret = 0; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; +@@ -376,7 +377,7 @@ glusterd_snapdsvc_restart() + conf = this->private; + GF_ASSERT(conf); + +- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) ++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) + { + /* Start per volume snapd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.c b/xlators/mgmt/glusterd/src/glusterd-statedump.c +index f5ecde7..69d4cf4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-statedump.c ++++ b/xlators/mgmt/glusterd/src/glusterd-statedump.c +@@ -202,9 +202,6 @@ glusterd_dump_priv(xlator_t *this) + gf_proc_dump_build_key(key, "glusterd", "ping-timeout"); + gf_proc_dump_write(key, "%d", priv->ping_timeout); + +- gf_proc_dump_build_key(key, "glusterd", "shd.online"); +- gf_proc_dump_write(key, "%d", priv->shd_svc.online); +- + gf_proc_dump_build_key(key, "glusterd", "nfs.online"); + gf_proc_dump_write(key, "%d", priv->nfs_svc.online); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index ca19a75..e42703c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -7,6 +7,7 @@ + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ ++#include + + #include + #include +@@ -20,12 +21,14 @@ + #include "glusterd-bitd-svc.h" + #include "glusterd-tierd-svc.h" + #include "glusterd-tierd-svc-helper.h" ++#include "glusterd-shd-svc-helper.h" + #include "glusterd-scrub-svc.h" + #include "glusterd-svc-helper.h" + #include ++#include "glusterd-snapshot-utils.h" + + int +-glusterd_svcs_reconfigure() ++glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo) + { + int ret = 0; + xlator_t *this = THIS; +@@ -43,9 +46,11 @@ glusterd_svcs_reconfigure() + goto out; + + svc_name = "self-heald"; +- ret = glusterd_shdsvc_reconfigure(); +- if (ret) +- goto out; ++ if (volinfo) { ++ ret = glusterd_shdsvc_reconfigure(volinfo); ++ if (ret) ++ goto out; ++ } + + if (conf->op_version == GD_OP_VERSION_MIN) + goto out; +@@ -69,7 +74,7 @@ out: + } + + int +-glusterd_svcs_stop() ++glusterd_svcs_stop(glusterd_volinfo_t *volinfo) + { + int ret = 0; + xlator_t *this = NULL; +@@ -85,14 +90,16 @@ glusterd_svcs_stop() + if (ret) + goto out; + +- ret = glusterd_svc_stop(&(priv->shd_svc), SIGTERM); +- if (ret) +- goto out; +- + ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM); + if (ret) + goto out; + ++ if (volinfo) { ++ ret = glusterd_svc_stop(&(volinfo->shd.svc), PROC_START_NO_WAIT); ++ if (ret) ++ goto out; ++ } ++ + ret = glusterd_svc_stop(&(priv->bitd_svc), SIGTERM); + if (ret) + goto out; +@@ -121,12 +128,6 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo) + if (ret) + goto out; + +- ret = conf->shd_svc.manager(&(conf->shd_svc), volinfo, PROC_START_NO_WAIT); +- if (ret == -EINVAL) +- ret = 0; +- if (ret) +- goto out; +- + if (conf->op_version == GD_OP_VERSION_MIN) + goto out; + +@@ -143,6 +144,15 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo) + if (ret) + goto out; + ++ if (volinfo) { ++ ret = volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo, ++ PROC_START_NO_WAIT); ++ if (ret == -EINVAL) ++ ret = 0; ++ if (ret) ++ goto out; ++ } ++ + ret = conf->scrub_svc.manager(&(conf->scrub_svc), NULL, PROC_START_NO_WAIT); + if (ret == -EINVAL) + ret = 0; +@@ -269,3 +279,678 @@ out: + GF_FREE(tmpvol); + return ret; + } ++ ++int ++glusterd_volume_svc_check_volfile_identical( ++ char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo, ++ glusterd_vol_graph_builder_t builder, gf_boolean_t *identical) ++{ ++ char orgvol[PATH_MAX] = { ++ 0, ++ }; ++ char *tmpvol = NULL; ++ xlator_t *this = NULL; ++ int ret = -1; ++ int need_unlink = 0; ++ int tmp_fd = -1; ++ ++ this = THIS; ++ ++ GF_VALIDATE_OR_GOTO(this->name, this, out); ++ GF_VALIDATE_OR_GOTO(this->name, identical, out); ++ ++ /* This builds volfile for volume level dameons */ ++ glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol, ++ sizeof(orgvol)); ++ ++ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); ++ if (ret < 0) { ++ goto out; ++ } ++ ++ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ tmp_fd = mkstemp(tmpvol); ++ if (tmp_fd < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, ++ "Unable to create temp file" ++ " %s:(%s)", ++ tmpvol, strerror(errno)); ++ ret = -1; ++ goto out; ++ } ++ ++ need_unlink = 1; ++ ++ ret = builder(volinfo, tmpvol, mode_dict); ++ if (ret) ++ goto out; ++ ++ ret = glusterd_check_files_identical(orgvol, tmpvol, identical); ++out: ++ if (need_unlink) ++ sys_unlink(tmpvol); ++ ++ if (tmpvol != NULL) ++ GF_FREE(tmpvol); ++ ++ if (tmp_fd >= 0) ++ sys_close(tmp_fd); ++ ++ return ret; ++} ++ ++int ++glusterd_volume_svc_check_topology_identical( ++ char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo, ++ glusterd_vol_graph_builder_t builder, gf_boolean_t *identical) ++{ ++ char orgvol[PATH_MAX] = { ++ 0, ++ }; ++ char *tmpvol = NULL; ++ glusterd_conf_t *conf = NULL; ++ xlator_t *this = THIS; ++ int ret = -1; ++ int tmpclean = 0; ++ int tmpfd = -1; ++ ++ if ((!identical) || (!this) || (!this->private)) ++ goto out; ++ ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, out); ++ ++ /* This builds volfile for volume level dameons */ ++ glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol, ++ sizeof(orgvol)); ++ /* Create the temporary volfile */ ++ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); ++ if (ret < 0) { ++ goto out; ++ } ++ ++ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ tmpfd = mkstemp(tmpvol); ++ if (tmpfd < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, ++ "Unable to create temp file" ++ " %s:(%s)", ++ tmpvol, strerror(errno)); ++ ret = -1; ++ goto out; ++ } ++ ++ tmpclean = 1; /* SET the flag to unlink() tmpfile */ ++ ++ ret = builder(volinfo, tmpvol, mode_dict); ++ if (ret) ++ goto out; ++ ++ /* Compare the topology of volfiles */ ++ ret = glusterd_check_topology_identical(orgvol, tmpvol, identical); ++out: ++ if (tmpfd >= 0) ++ sys_close(tmpfd); ++ if (tmpclean) ++ sys_unlink(tmpvol); ++ if (tmpvol != NULL) ++ GF_FREE(tmpvol); ++ return ret; ++} ++ ++void * ++__gf_find_compatible_svc(gd_node_type daemon) ++{ ++ glusterd_svc_proc_t *svc_proc = NULL; ++ glusterd_svc_proc_t *return_proc = NULL; ++ glusterd_svc_t *parent_svc = NULL; ++ struct cds_list_head *svc_procs = NULL; ++ glusterd_conf_t *conf = NULL; ++ int pid = -1; ++ ++ conf = THIS->private; ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ ++ if (daemon == GD_NODE_SHD) { ++ svc_procs = &conf->shd_procs; ++ if (!svc_procs) ++ goto out; ++ } ++ ++ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list) ++ { ++ parent_svc = cds_list_entry(svc_proc->svcs.next, glusterd_svc_t, ++ mux_svc); ++ if (!return_proc) ++ return_proc = svc_proc; ++ ++ /* If there is an already running shd daemons, select it. Otehrwise ++ * select the first one. ++ */ ++ if (parent_svc && gf_is_service_running(parent_svc->proc.pidfile, &pid)) ++ return (void *)svc_proc; ++ /* ++ * Logic to select one process goes here. Currently there is only one ++ * shd_proc. So selecting the first one; ++ */ ++ } ++out: ++ return return_proc; ++} ++ ++glusterd_svc_proc_t * ++glusterd_svcprocess_new() ++{ ++ glusterd_svc_proc_t *new_svcprocess = NULL; ++ ++ new_svcprocess = GF_CALLOC(1, sizeof(*new_svcprocess), ++ gf_gld_mt_glusterd_svc_proc_t); ++ ++ if (!new_svcprocess) ++ return NULL; ++ ++ CDS_INIT_LIST_HEAD(&new_svcprocess->svc_proc_list); ++ CDS_INIT_LIST_HEAD(&new_svcprocess->svcs); ++ new_svcprocess->notify = glusterd_muxsvc_common_rpc_notify; ++ return new_svcprocess; ++} ++ ++int ++glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) ++{ ++ int ret = -1; ++ glusterd_svc_proc_t *mux_proc = NULL; ++ glusterd_conn_t *mux_conn = NULL; ++ glusterd_conf_t *conf = NULL; ++ glusterd_svc_t *parent_svc = NULL; ++ int pid = -1; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); ++ conf = THIS->private; ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ if (!svc->inited) { ++ if (gf_is_service_running(svc->proc.pidfile, &pid)) { ++ /* Just connect is required, but we don't know what happens ++ * during the disconnect. So better to reattach. ++ */ ++ mux_proc = __gf_find_compatible_svc_from_pid(GD_NODE_SHD, pid); ++ } ++ ++ if (!mux_proc) { ++ if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) { ++ /* stale pid file, unlink it. */ ++ kill(pid, SIGTERM); ++ sys_unlink(svc->proc.pidfile); ++ } ++ mux_proc = __gf_find_compatible_svc(GD_NODE_SHD); ++ } ++ if (mux_proc) { ++ /* Take first entry from the process */ ++ parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t, ++ mux_svc); ++ sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); ++ mux_conn = &parent_svc->conn; ++ if (volinfo) ++ volinfo->shd.attached = _gf_true; ++ } else { ++ mux_proc = glusterd_svcprocess_new(); ++ if (!mux_proc) { ++ ret = -1; ++ goto unlock; ++ } ++ cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs); ++ } ++ svc->svc_proc = mux_proc; ++ cds_list_del_init(&svc->mux_svc); ++ cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs); ++ ret = glusterd_shdsvc_init(volinfo, mux_conn, mux_proc); ++ if (ret) { ++ pthread_mutex_unlock(&conf->attach_lock); ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC, ++ "Failed to init shd " ++ "service"); ++ goto out; ++ } ++ gf_msg_debug(THIS->name, 0, "shd service initialized"); ++ svc->inited = _gf_true; ++ } ++ ret = 0; ++ } ++unlock: ++ pthread_mutex_unlock(&conf->attach_lock); ++out: ++ return ret; ++} ++ ++void * ++__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid) ++{ ++ glusterd_svc_proc_t *svc_proc = NULL; ++ struct cds_list_head *svc_procs = NULL; ++ glusterd_svc_t *svc = NULL; ++ pid_t mux_pid = -1; ++ glusterd_conf_t *conf = NULL; ++ ++ conf = THIS->private; ++ if (!conf) ++ return NULL; ++ ++ if (daemon == GD_NODE_SHD) { ++ svc_procs = &conf->shd_procs; ++ if (!svc_proc) ++ return NULL; ++ } /* Can be moved to switch when mux is implemented for other daemon; */ ++ ++ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list) ++ { ++ cds_list_for_each_entry(svc, &svc_proc->svcs, mux_svc) ++ { ++ if (gf_is_service_running(svc->proc.pidfile, &mux_pid)) { ++ if (mux_pid == pid) { ++ /*TODO ++ * inefficient loop, but at the moment, there is only ++ * one shd. ++ */ ++ return svc_proc; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++static int32_t ++my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame) ++{ ++ call_frame_t *frame = v_frame; ++ xlator_t *this = NULL; ++ glusterd_conf_t *conf = NULL; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", frame, out); ++ this = frame->this; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, out); ++ ++ GF_ATOMIC_DEC(conf->blockers); ++ ++ STACK_DESTROY(frame->root); ++out: ++ return 0; ++} ++ ++static int32_t ++glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, ++ void *v_frame) ++{ ++ call_frame_t *frame = v_frame; ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_shdsvc_t *shd = NULL; ++ glusterd_svc_t *svc = frame->cookie; ++ glusterd_svc_t *parent_svc = NULL; ++ glusterd_svc_proc_t *mux_proc = NULL; ++ glusterd_conf_t *conf = NULL; ++ int *flag = (int *)frame->local; ++ xlator_t *this = THIS; ++ int pid = -1; ++ int ret = -1; ++ gf_getspec_rsp rsp = { ++ 0, ++ }; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ GF_VALIDATE_OR_GOTO("glusterd", frame, out); ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ ++ frame->local = NULL; ++ frame->cookie = NULL; ++ ++ if (!strcmp(svc->name, "glustershd")) { ++ /* Get volinfo->shd from svc object */ ++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); ++ if (!shd) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, ++ "Failed to get shd object " ++ "from shd service"); ++ goto out; ++ } ++ ++ /* Get volinfo from shd */ ++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); ++ if (!volinfo) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "Failed to get volinfo from " ++ "from shd"); ++ goto out; ++ } ++ } ++ ++ if (!iov) { ++ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, ++ "iov is NULL"); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp); ++ if (ret < 0) { ++ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, ++ "XDR decoding error"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (rsp.op_ret == 0) { ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ if (!strcmp(svc->name, "glustershd")) { ++ mux_proc = svc->svc_proc; ++ if (mux_proc && ++ !gf_is_service_running(svc->proc.pidfile, &pid)) { ++ /* ++ * When svc's are restarting, there is a chance that the ++ * attached svc might not have updated it's pid. Because ++ * it was at connection stage. So in that case, we need ++ * to retry the pid file copy. ++ */ ++ parent_svc = cds_list_entry(mux_proc->svcs.next, ++ glusterd_svc_t, mux_svc); ++ if (parent_svc) ++ sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); ++ } ++ } ++ svc->online = _gf_true; ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "svc %s of volume %s attached successfully to pid %d", svc->name, ++ volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ } else { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "svc %s of volume %s failed to " ++ "attach to pid %d. Starting a new process", ++ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ if (!strcmp(svc->name, "glustershd")) { ++ glusterd_recover_shd_attach_failure(volinfo, svc, *flag); ++ } ++ } ++out: ++ if (flag) { ++ GF_FREE(flag); ++ } ++ GF_ATOMIC_DEC(conf->blockers); ++ STACK_DESTROY(frame->root); ++ return 0; ++} ++ ++extern size_t ++build_volfile_path(char *volume_id, char *path, size_t path_len, ++ char *trusted_str); ++ ++int ++__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, ++ struct rpc_clnt *rpc, char *volfile_id, ++ int op) ++{ ++ int ret = -1; ++ struct iobuf *iobuf = NULL; ++ struct iobref *iobref = NULL; ++ struct iovec iov = { ++ 0, ++ }; ++ char path[PATH_MAX] = { ++ '\0', ++ }; ++ struct stat stbuf = { ++ 0, ++ }; ++ int32_t spec_fd = -1; ++ size_t file_len = -1; ++ char *volfile_content = NULL; ++ ssize_t req_size = 0; ++ call_frame_t *frame = NULL; ++ gd1_mgmt_brick_op_req brick_req; ++ void *req = &brick_req; ++ void *errlbl = &&err; ++ struct rpc_clnt_connection *conn; ++ xlator_t *this = THIS; ++ glusterd_conf_t *conf = THIS->private; ++ extern struct rpc_clnt_program gd_brick_prog; ++ fop_cbk_fn_t cbkfn = my_callback; ++ ++ if (!rpc) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PARAM_NULL, ++ "called with null rpc"); ++ return -1; ++ } ++ ++ conn = &rpc->conn; ++ if (!conn->connected || conn->disconnected) { ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED, ++ "not connected yet"); ++ return -1; ++ } ++ ++ brick_req.op = op; ++ brick_req.name = volfile_id; ++ brick_req.input.input_val = NULL; ++ brick_req.input.input_len = 0; ++ ++ frame = create_frame(this, this->ctx->pool); ++ if (!frame) { ++ goto *errlbl; ++ } ++ ++ if (op == GLUSTERD_SVC_ATTACH) { ++ (void)build_volfile_path(volfile_id, path, sizeof(path), NULL); ++ ++ ret = sys_stat(path, &stbuf); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "Unable to stat %s (%s)", path, strerror(errno)); ++ ret = -EINVAL; ++ goto *errlbl; ++ } ++ ++ file_len = stbuf.st_size; ++ volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char); ++ if (!volfile_content) { ++ ret = -ENOMEM; ++ goto *errlbl; ++ } ++ spec_fd = open(path, O_RDONLY); ++ if (spec_fd < 0) { ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "failed to read volfile %s", path); ++ ret = -EIO; ++ goto *errlbl; ++ } ++ ret = sys_read(spec_fd, volfile_content, file_len); ++ if (ret == file_len) { ++ brick_req.input.input_val = volfile_content; ++ brick_req.input.input_len = file_len; ++ } else { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "read failed on path %s. File size=%" GF_PRI_SIZET ++ "read size=%d", ++ path, file_len, ret); ++ ret = -EIO; ++ goto *errlbl; ++ } ++ ++ frame->cookie = svc; ++ frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int); ++ *((int *)frame->local) = flags; ++ cbkfn = glusterd_svc_attach_cbk; ++ } ++ ++ req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req); ++ iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size); ++ if (!iobuf) { ++ goto *errlbl; ++ } ++ errlbl = &&maybe_free_iobuf; ++ ++ iov.iov_base = iobuf->ptr; ++ iov.iov_len = iobuf_pagesize(iobuf); ++ ++ iobref = iobref_new(); ++ if (!iobref) { ++ goto *errlbl; ++ } ++ errlbl = &&free_iobref; ++ ++ iobref_add(iobref, iobuf); ++ /* ++ * Drop our reference to the iobuf. The iobref should already have ++ * one after iobref_add, so when we unref that we'll free the iobuf as ++ * well. This allows us to pass just the iobref as frame->local. ++ */ ++ iobuf_unref(iobuf); ++ /* Set the pointer to null so we don't free it on a later error. */ ++ iobuf = NULL; ++ ++ /* Create the xdr payload */ ++ ret = xdr_serialize_generic(iov, req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req); ++ if (ret == -1) { ++ goto *errlbl; ++ } ++ iov.iov_len = ret; ++ ++ /* Send the msg */ ++ GF_ATOMIC_INC(conf->blockers); ++ ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0, ++ iobref, frame, NULL, 0, NULL, 0, NULL); ++ GF_FREE(volfile_content); ++ if (spec_fd >= 0) ++ sys_close(spec_fd); ++ return ret; ++ ++free_iobref: ++ iobref_unref(iobref); ++maybe_free_iobuf: ++ if (iobuf) { ++ iobuf_unref(iobuf); ++ } ++err: ++ GF_FREE(volfile_content); ++ if (spec_fd >= 0) ++ sys_close(spec_fd); ++ if (frame) ++ STACK_DESTROY(frame->root); ++ return -1; ++} ++ ++int ++glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags) ++{ ++ glusterd_conf_t *conf = THIS->private; ++ int ret = -1; ++ int tries; ++ rpc_clnt_t *rpc = NULL; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); ++ ++ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_ATTACH_INFO, ++ "adding svc %s (volume=%s) to existing " ++ "process with pid %d", ++ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ ++ rpc = rpc_clnt_ref(svc->conn.rpc); ++ for (tries = 15; tries > 0; --tries) { ++ if (rpc) { ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ ret = __glusterd_send_svc_configure_req( ++ svc, flags, rpc, svc->proc.volfileid, GLUSTERD_SVC_ATTACH); ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ if (!ret) { ++ volinfo->shd.attached = _gf_true; ++ goto out; ++ } ++ } ++ /* ++ * It might not actually be safe to manipulate the lock ++ * like this, but if we don't then the connection can ++ * never actually complete and retries are useless. ++ * Unfortunately, all of the alternatives (e.g. doing ++ * all of this in a separate thread) are much more ++ * complicated and risky. ++ * TBD: see if there's a better way ++ */ ++ synclock_unlock(&conf->big_lock); ++ sleep(1); ++ synclock_lock(&conf->big_lock); ++ } ++ ret = -1; ++ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "attach failed for %s(volume=%s)", svc->name, volinfo->volname); ++out: ++ if (rpc) ++ rpc_clnt_unref(rpc); ++ return ret; ++} ++ ++int ++glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig) ++{ ++ glusterd_conf_t *conf = THIS->private; ++ int ret = -1; ++ int tries; ++ rpc_clnt_t *rpc = NULL; ++ ++ GF_VALIDATE_OR_GOTO(THIS->name, conf, out); ++ GF_VALIDATE_OR_GOTO(THIS->name, svc, out); ++ GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); ++ ++ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DETACH_INFO, ++ "removing svc %s (volume=%s) from existing " ++ "process with pid %d", ++ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ ++ rpc = rpc_clnt_ref(svc->conn.rpc); ++ for (tries = 15; tries > 0; --tries) { ++ if (rpc) { ++ /*For detach there is no flags, and we are not using sig.*/ ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ ret = __glusterd_send_svc_configure_req(svc, 0, svc->conn.rpc, ++ svc->proc.volfileid, ++ GLUSTERD_SVC_DETACH); ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ if (!ret) { ++ goto out; ++ } ++ } ++ /* ++ * It might not actually be safe to manipulate the lock ++ * like this, but if we don't then the connection can ++ * never actually complete and retries are useless. ++ * Unfortunately, all of the alternatives (e.g. doing ++ * all of this in a separate thread) are much more ++ * complicated and risky. ++ * TBD: see if there's a better way ++ */ ++ synclock_unlock(&conf->big_lock); ++ sleep(1); ++ synclock_lock(&conf->big_lock); ++ } ++ ret = -1; ++ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_DETACH_FAIL, ++ "detach failed for %s(volume=%s)", svc->name, volinfo->volname); ++out: ++ if (rpc) ++ rpc_clnt_unref(rpc); ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h +index cc98e78..5def246 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h +@@ -16,10 +16,10 @@ + #include "glusterd-volgen.h" + + int +-glusterd_svcs_reconfigure(); ++glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo); + + int +-glusterd_svcs_stop(); ++glusterd_svcs_stop(glusterd_volinfo_t *vol); + + int + glusterd_svcs_manager(glusterd_volinfo_t *volinfo); +@@ -41,5 +41,41 @@ int + glusterd_svc_check_tier_topology_identical(char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical); ++int ++glusterd_volume_svc_check_volfile_identical(char *svc_name, dict_t *mode_dict, ++ glusterd_volinfo_t *volinfo, ++ glusterd_vol_graph_builder_t, ++ gf_boolean_t *identical); ++int ++glusterd_volume_svc_check_topology_identical(char *svc_name, dict_t *mode_dict, ++ glusterd_volinfo_t *volinfo, ++ glusterd_vol_graph_builder_t, ++ gf_boolean_t *identical); ++void ++glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol, ++ char *volfile, size_t len); ++void * ++__gf_find_compatible_svc(gd_node_type daemon); ++ ++glusterd_svc_proc_t * ++glusterd_svcprocess_new(); ++ ++int ++glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc); ++ ++void * ++__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid); ++ ++int ++glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, ++ int flags); ++ ++int ++glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig); ++ ++int ++__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flag, ++ struct rpc_clnt *rpc, char *volfile_id, ++ int op); + + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +index 4cd4cea..f32dafc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +@@ -18,6 +18,7 @@ + #include "glusterd-conn-mgmt.h" + #include "glusterd-messages.h" + #include ++#include "glusterd-shd-svc-helper.h" + + int + glusterd_svc_create_rundir(char *rundir) +@@ -167,68 +168,75 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) + GF_ASSERT(this); + + priv = this->private; +- GF_ASSERT(priv); ++ GF_VALIDATE_OR_GOTO("glusterd", priv, out); ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ ++ pthread_mutex_lock(&priv->attach_lock); ++ { ++ if (glusterd_proc_is_running(&(svc->proc))) { ++ ret = 0; ++ goto unlock; ++ } + +- if (glusterd_proc_is_running(&(svc->proc))) { +- ret = 0; +- goto out; +- } ++ ret = sys_access(svc->proc.volfile, F_OK); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND, ++ "Volfile %s is not present", svc->proc.volfile); ++ goto unlock; ++ } + +- ret = sys_access(svc->proc.volfile, F_OK); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND, +- "Volfile %s is not present", svc->proc.volfile); +- goto out; +- } ++ runinit(&runner); + +- runinit(&runner); ++ if (this->ctx->cmd_args.valgrind) { ++ len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log", ++ svc->proc.logfile, svc->name); ++ if ((len < 0) || (len >= PATH_MAX)) { ++ ret = -1; ++ goto unlock; ++ } + +- if (this->ctx->cmd_args.valgrind) { +- len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log", +- svc->proc.logfile, svc->name); +- if ((len < 0) || (len >= PATH_MAX)) { +- ret = -1; +- goto out; ++ runner_add_args(&runner, "valgrind", "--leak-check=full", ++ "--trace-children=yes", "--track-origins=yes", ++ NULL); ++ runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); + } + +- runner_add_args(&runner, "valgrind", "--leak-check=full", +- "--trace-children=yes", "--track-origins=yes", NULL); +- runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); +- } +- +- runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", +- svc->proc.volfileserver, "--volfile-id", +- svc->proc.volfileid, "-p", svc->proc.pidfile, "-l", +- svc->proc.logfile, "-S", svc->conn.sockpath, NULL); ++ runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", ++ svc->proc.volfileserver, "--volfile-id", ++ svc->proc.volfileid, "-p", svc->proc.pidfile, "-l", ++ svc->proc.logfile, "-S", svc->conn.sockpath, NULL); + +- if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, +- SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY), +- &localtime_logging) == 0) { +- if (strcmp(localtime_logging, "enable") == 0) +- runner_add_arg(&runner, "--localtime-logging"); +- } +- if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY, +- SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY), &log_level) == 0) { +- snprintf(daemon_log_level, 30, "--log-level=%s", log_level); +- runner_add_arg(&runner, daemon_log_level); +- } ++ if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, ++ SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY), ++ &localtime_logging) == 0) { ++ if (strcmp(localtime_logging, "enable") == 0) ++ runner_add_arg(&runner, "--localtime-logging"); ++ } ++ if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY, ++ SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY), ++ &log_level) == 0) { ++ snprintf(daemon_log_level, 30, "--log-level=%s", log_level); ++ runner_add_arg(&runner, daemon_log_level); ++ } + +- if (cmdline) +- dict_foreach(cmdline, svc_add_args, (void *)&runner); ++ if (cmdline) ++ dict_foreach(cmdline, svc_add_args, (void *)&runner); + +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS, +- "Starting %s service", svc->name); ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS, ++ "Starting %s service", svc->name); + +- if (flags == PROC_START_NO_WAIT) { +- ret = runner_run_nowait(&runner); +- } else { +- synclock_unlock(&priv->big_lock); +- { +- ret = runner_run(&runner); ++ if (flags == PROC_START_NO_WAIT) { ++ ret = runner_run_nowait(&runner); ++ } else { ++ synclock_unlock(&priv->big_lock); ++ { ++ ret = runner_run(&runner); ++ } ++ synclock_lock(&priv->big_lock); + } +- synclock_lock(&priv->big_lock); + } +- ++unlock: ++ pthread_mutex_unlock(&priv->attach_lock); + out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + +@@ -281,7 +289,8 @@ glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile, + + glusterd_svc_build_svcdir(server, workdir, dir, sizeof(dir)); + +- if (!strcmp(server, "quotad")) /*quotad has different volfile name*/ ++ if (!strcmp(server, "quotad")) ++ /*quotad has different volfile name*/ + snprintf(volfile, len, "%s/%s.vol", dir, server); + else + snprintf(volfile, len, "%s/%s-server.vol", dir, server); +@@ -366,3 +375,138 @@ glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event) + + return ret; + } ++ ++void ++glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol, ++ char *volfile, size_t len) ++{ ++ GF_ASSERT(len == PATH_MAX); ++ ++ if (!strcmp(server, "glustershd")) { ++ glusterd_svc_build_shd_volfile_path(vol, volfile, len); ++ } ++} ++ ++int ++glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *mux_proc, ++ rpc_clnt_event_t event) ++{ ++ int ret = 0; ++ glusterd_svc_t *svc = NULL; ++ glusterd_svc_t *tmp = NULL; ++ xlator_t *this = NULL; ++ gf_boolean_t need_logging = _gf_false; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ if (!mux_proc) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, ++ "Failed to get the svc proc data"); ++ return -1; ++ } ++ ++ /* Currently this function was used for shd svc, if this function is ++ * using for another svc, change ths glustershd reference. We can get ++ * the svc name from any of the attached svc's ++ */ ++ switch (event) { ++ case RPC_CLNT_CONNECT: ++ gf_msg_debug(this->name, 0, ++ "glustershd has connected with glusterd."); ++ gf_event(EVENT_SVC_CONNECTED, "svc_name=glustershd"); ++ cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc) ++ { ++ if (svc->online) ++ continue; ++ svc->online = _gf_true; ++ } ++ break; ++ ++ case RPC_CLNT_DISCONNECT: ++ cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc) ++ { ++ if (svc->online) { ++ if (!need_logging) ++ need_logging = _gf_true; ++ svc->online = _gf_false; ++ } ++ } ++ if (need_logging) { ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED, ++ "glustershd has disconnected from glusterd."); ++ gf_event(EVENT_SVC_DISCONNECTED, "svc_name=glustershd"); ++ } ++ break; ++ ++ default: ++ gf_msg_trace(this->name, 0, "got some other RPC event %d", event); ++ break; ++ } ++ ++ return ret; ++} ++ ++int ++glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc, ++ char *sockpath, int frame_timeout, ++ glusterd_muxsvc_conn_notify_t notify) ++{ ++ int ret = -1; ++ dict_t *options = NULL; ++ struct rpc_clnt *rpc = NULL; ++ xlator_t *this = THIS; ++ glusterd_svc_t *svc = NULL; ++ ++ options = dict_new(); ++ if (!this || !options) ++ goto out; ++ ++ svc = cds_list_entry(conn, glusterd_svc_t, conn); ++ if (!svc) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, ++ "Failed to get the service"); ++ goto out; ++ } ++ ++ ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout); ++ if (ret) ++ goto out; ++ ++ ret = dict_set_int32n(options, "transport.socket.ignore-enoent", ++ SLEN("transport.socket.ignore-enoent"), 1); ++ if (ret) ++ goto out; ++ ++ /* @options is free'd by rpc_transport when destroyed */ ++ rpc = rpc_clnt_new(options, this, (char *)svc->name, 16); ++ if (!rpc) { ++ ret = -1; ++ goto out; ++ } ++ ++ ret = rpc_clnt_register_notify(rpc, glusterd_muxsvc_conn_common_notify, ++ mux_proc); ++ if (ret) ++ goto out; ++ ++ ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath); ++ if (ret < 0) ++ goto out; ++ else ++ ret = 0; ++ ++ conn->frame_timeout = frame_timeout; ++ conn->rpc = rpc; ++ mux_proc->notify = notify; ++out: ++ if (options) ++ dict_unref(options); ++ if (ret) { ++ if (rpc) { ++ rpc_clnt_unref(rpc); ++ rpc = NULL; ++ } ++ } ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h +index c850bfd..fbc5225 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h +@@ -13,9 +13,12 @@ + + #include "glusterd-proc-mgmt.h" + #include "glusterd-conn-mgmt.h" ++#include "glusterd-rcu.h" + + struct glusterd_svc_; ++ + typedef struct glusterd_svc_ glusterd_svc_t; ++typedef struct glusterd_svc_proc_ glusterd_svc_proc_t; + + typedef void (*glusterd_svc_build_t)(glusterd_svc_t *svc); + +@@ -25,6 +28,17 @@ typedef int (*glusterd_svc_start_t)(glusterd_svc_t *svc, int flags); + typedef int (*glusterd_svc_stop_t)(glusterd_svc_t *svc, int sig); + typedef int (*glusterd_svc_reconfigure_t)(void *data); + ++typedef int (*glusterd_muxsvc_conn_notify_t)(glusterd_svc_proc_t *mux_proc, ++ rpc_clnt_event_t event); ++ ++struct glusterd_svc_proc_ { ++ struct cds_list_head svc_proc_list; ++ struct cds_list_head svcs; ++ glusterd_muxsvc_conn_notify_t notify; ++ rpc_clnt_t *rpc; ++ void *data; ++}; ++ + struct glusterd_svc_ { + char name[NAME_MAX]; + glusterd_conn_t conn; +@@ -35,6 +49,8 @@ struct glusterd_svc_ { + gf_boolean_t online; + gf_boolean_t inited; + glusterd_svc_reconfigure_t reconfigure; ++ glusterd_svc_proc_t *svc_proc; ++ struct cds_list_head mux_svc; + }; + + int +@@ -69,4 +85,15 @@ glusterd_svc_reconfigure(int (*create_volfile)()); + int + glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event); + ++int ++glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *conn, ++ rpc_clnt_event_t event); ++ ++int ++glusterd_proc_get_pid(glusterd_proc_t *proc); ++ ++int ++glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc, ++ char *sockpath, int frame_timeout, ++ glusterd_muxsvc_conn_notify_t notify); + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-tier.c b/xlators/mgmt/glusterd/src/glusterd-tier.c +index 4dc0d44..23a9592 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-tier.c ++++ b/xlators/mgmt/glusterd/src/glusterd-tier.c +@@ -27,6 +27,7 @@ + #include "glusterd-messages.h" + #include "glusterd-mgmt.h" + #include "glusterd-syncop.h" ++#include "glusterd-shd-svc-helper.h" + + #include + #include +@@ -615,7 +616,7 @@ glusterd_op_remove_tier_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + + if (cmd == GF_DEFRAG_CMD_DETACH_START && + volinfo->status == GLUSTERD_STATUS_STARTED) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL, + "Unable to reconfigure NFS-Server"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c +index 04ceec5..ab463f1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c +@@ -83,7 +83,6 @@ glusterd_tierdsvc_init(void *data) + goto out; + + notify = glusterd_svc_common_rpc_notify; +- glusterd_store_perform_node_state_store(volinfo); + + volinfo->type = GF_CLUSTER_TYPE_TIER; + +@@ -395,6 +394,7 @@ int + glusterd_tierdsvc_restart() + { + glusterd_volinfo_t *volinfo = NULL; ++ glusterd_volinfo_t *tmp = NULL; + int ret = 0; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; +@@ -405,7 +405,7 @@ glusterd_tierdsvc_restart() + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + +- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) ++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) + { + /* Start per volume tierd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED && +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 52b83ec..ef664c2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -61,6 +61,7 @@ + #include "glusterd-server-quorum.h" + #include + #include ++#include "glusterd-shd-svc-helper.h" + + #include "xdr-generic.h" + #include +@@ -583,13 +584,17 @@ glusterd_volinfo_t * + glusterd_volinfo_unref(glusterd_volinfo_t *volinfo) + { + int refcnt = -1; ++ glusterd_conf_t *conf = THIS->private; + +- pthread_mutex_lock(&volinfo->reflock); ++ pthread_mutex_lock(&conf->volume_lock); + { +- refcnt = --volinfo->refcnt; ++ pthread_mutex_lock(&volinfo->reflock); ++ { ++ refcnt = --volinfo->refcnt; ++ } ++ pthread_mutex_unlock(&volinfo->reflock); + } +- pthread_mutex_unlock(&volinfo->reflock); +- ++ pthread_mutex_unlock(&conf->volume_lock); + if (!refcnt) { + glusterd_volinfo_delete(volinfo); + return NULL; +@@ -661,6 +666,7 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo) + glusterd_snapdsvc_build(&new_volinfo->snapd.svc); + glusterd_tierdsvc_build(&new_volinfo->tierd.svc); + glusterd_gfproxydsvc_build(&new_volinfo->gfproxyd.svc); ++ glusterd_shdsvc_build(&new_volinfo->shd.svc); + + pthread_mutex_init(&new_volinfo->reflock, NULL); + *volinfo = glusterd_volinfo_ref(new_volinfo); +@@ -1026,11 +1032,11 @@ glusterd_volinfo_delete(glusterd_volinfo_t *volinfo) + gf_store_handle_destroy(volinfo->snapd.handle); + + glusterd_auth_cleanup(volinfo); ++ glusterd_shd_svcproc_cleanup(&volinfo->shd); + + pthread_mutex_destroy(&volinfo->reflock); + GF_FREE(volinfo); + ret = 0; +- + out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +@@ -3619,6 +3625,7 @@ glusterd_spawn_daemons(void *opaque) + ret = glusterd_snapdsvc_restart(); + ret = glusterd_tierdsvc_restart(); + ret = glusterd_gfproxydsvc_restart(); ++ ret = glusterd_shdsvc_restart(); + return ret; + } + +@@ -4569,6 +4576,9 @@ glusterd_delete_stale_volume(glusterd_volinfo_t *stale_volinfo, + svc = &(stale_volinfo->snapd.svc); + (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT); + } ++ svc = &(stale_volinfo->shd.svc); ++ (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT); ++ + (void)glusterd_volinfo_remove(stale_volinfo); + + return 0; +@@ -4683,6 +4693,15 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + glusterd_volinfo_unref(old_volinfo); + } + ++ ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, ++ "Failed to store " ++ "volinfo for volume %s", ++ new_volinfo->volname); ++ goto out; ++ } ++ + if (glusterd_is_volume_started(new_volinfo)) { + (void)glusterd_start_bricks(new_volinfo); + if (glusterd_is_snapd_enabled(new_volinfo)) { +@@ -4691,15 +4710,10 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + } + } +- } +- +- ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, +- "Failed to store " +- "volinfo for volume %s", +- new_volinfo->volname); +- goto out; ++ svc = &(new_volinfo->shd.svc); ++ if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) { ++ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); ++ } + } + + ret = glusterd_create_volfiles_and_notify_services(new_volinfo); +@@ -5174,9 +5188,7 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count, + glusterd_svc_build_pidfile_path(server, priv->rundir, pidfile, + sizeof(pidfile)); + +- if (strcmp(server, priv->shd_svc.name) == 0) +- svc = &(priv->shd_svc); +- else if (strcmp(server, priv->nfs_svc.name) == 0) ++ if (strcmp(server, priv->nfs_svc.name) == 0) + svc = &(priv->nfs_svc); + else if (strcmp(server, priv->quotad_svc.name) == 0) + svc = &(priv->quotad_svc); +@@ -5207,9 +5219,6 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count, + if (!strcmp(server, priv->nfs_svc.name)) + ret = dict_set_nstrn(dict, key, keylen, "NFS Server", + SLEN("NFS Server")); +- else if (!strcmp(server, priv->shd_svc.name)) +- ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon", +- SLEN("Self-heal Daemon")); + else if (!strcmp(server, priv->quotad_svc.name)) + ret = dict_set_nstrn(dict, key, keylen, "Quota Daemon", + SLEN("Quota Daemon")); +@@ -8773,6 +8782,21 @@ glusterd_friend_remove_cleanup_vols(uuid_t uuid) + "to stop snapd daemon service"); + } + } ++ ++ if (glusterd_is_shd_compatible_volume(volinfo)) { ++ /* ++ * Sending stop request for all volumes. So it is fine ++ * to send stop for mux shd ++ */ ++ svc = &(volinfo->shd.svc); ++ ret = svc->stop(svc, SIGTERM); ++ if (ret) { ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, ++ "Failed " ++ "to stop shd daemon service"); ++ } ++ } ++ + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + svc = &(volinfo->tierd.svc); + ret = svc->stop(svc, SIGTERM); +@@ -8798,7 +8822,7 @@ glusterd_friend_remove_cleanup_vols(uuid_t uuid) + } + + /* Reconfigure all daemon services upon peer detach */ +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(NULL); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed to reconfigure all daemon services."); +@@ -14350,3 +14374,74 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo) + return _gf_true; + return _gf_false; + } ++ ++int32_t ++glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, ++ int32_t count) ++{ ++ int ret = -1; ++ int32_t pid = -1; ++ int32_t brick_online = -1; ++ char key[64] = {0}; ++ int keylen; ++ char *pidfile = NULL; ++ xlator_t *this = NULL; ++ char *uuid_str = NULL; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO(THIS->name, this, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, volinfo, out); ++ GF_VALIDATE_OR_GOTO(this->name, dict, out); ++ ++ keylen = snprintf(key, sizeof(key), "brick%d.hostname", count); ++ ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon", ++ SLEN("Self-heal Daemon")); ++ if (ret) ++ goto out; ++ ++ keylen = snprintf(key, sizeof(key), "brick%d.path", count); ++ uuid_str = gf_strdup(uuid_utoa(MY_UUID)); ++ if (!uuid_str) { ++ ret = -1; ++ goto out; ++ } ++ ret = dict_set_dynstrn(dict, key, keylen, uuid_str); ++ if (ret) ++ goto out; ++ uuid_str = NULL; ++ ++ /* shd doesn't have a port. but the cli needs a port key with ++ * a zero value to parse. ++ * */ ++ ++ keylen = snprintf(key, sizeof(key), "brick%d.port", count); ++ ret = dict_set_int32n(dict, key, keylen, 0); ++ if (ret) ++ goto out; ++ ++ pidfile = volinfo->shd.svc.proc.pidfile; ++ ++ brick_online = gf_is_service_running(pidfile, &pid); ++ ++ /* If shd is not running, then don't print the pid */ ++ if (!brick_online) ++ pid = -1; ++ keylen = snprintf(key, sizeof(key), "brick%d.pid", count); ++ ret = dict_set_int32n(dict, key, keylen, pid); ++ if (ret) ++ goto out; ++ ++ keylen = snprintf(key, sizeof(key), "brick%d.status", count); ++ ret = dict_set_int32n(dict, key, keylen, brick_online); ++ ++out: ++ if (uuid_str) ++ GF_FREE(uuid_str); ++ if (ret) ++ gf_msg(this ? this->name : "glusterd", GF_LOG_ERROR, 0, ++ GD_MSG_DICT_SET_FAILED, ++ "Returning %d. adding values to dict failed", ret); ++ ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 9bf19a6..3647c34 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -876,4 +876,8 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo); + + char * + search_brick_path_from_proc(pid_t brick_pid, char *brickpath); ++ ++int32_t ++glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, ++ int32_t count); + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 1f53beb..324ec2f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -36,6 +36,7 @@ + #include "glusterd-svc-mgmt.h" + #include "glusterd-svc-helper.h" + #include "glusterd-snapd-svc-helper.h" ++#include "glusterd-shd-svc-helper.h" + #include "glusterd-gfproxyd-svc-helper.h" + + struct gd_validate_reconf_opts { +@@ -4845,7 +4846,7 @@ volgen_get_shd_key(int type) + static int + volgen_set_shd_key_enable(dict_t *set_dict, const int type) + { +- int ret = -1; ++ int ret = 0; + + switch (type) { + case GF_CLUSTER_TYPE_REPLICATE: +@@ -5136,24 +5137,15 @@ out: + static int + build_shd_volume_graph(xlator_t *this, volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, dict_t *mod_dict, +- dict_t *set_dict, gf_boolean_t graph_check, +- gf_boolean_t *valid_config) ++ dict_t *set_dict, gf_boolean_t graph_check) + { + volgen_graph_t cgraph = {0}; + int ret = 0; + int clusters = -1; + +- if (!graph_check && (volinfo->status != GLUSTERD_STATUS_STARTED)) +- goto out; +- + if (!glusterd_is_shd_compatible_volume(volinfo)) + goto out; + +- /* Shd graph is valid only when there is at least one +- * replica/disperse volume is present +- */ +- *valid_config = _gf_true; +- + ret = prepare_shd_volume_options(volinfo, mod_dict, set_dict); + if (ret) + goto out; +@@ -5183,19 +5175,16 @@ out: + } + + int +-build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict) ++build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph, ++ dict_t *mod_dict) + { +- glusterd_volinfo_t *voliter = NULL; + xlator_t *this = NULL; +- glusterd_conf_t *priv = NULL; + dict_t *set_dict = NULL; + int ret = 0; +- gf_boolean_t valid_config = _gf_false; + xlator_t *iostxl = NULL; + gf_boolean_t graph_check = _gf_false; + + this = THIS; +- priv = this->private; + + set_dict = dict_new(); + if (!set_dict) { +@@ -5205,26 +5194,18 @@ build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict) + + if (mod_dict) + graph_check = dict_get_str_boolean(mod_dict, "graph-check", 0); +- iostxl = volgen_graph_add_as(graph, "debug/io-stats", "glustershd"); ++ iostxl = volgen_graph_add_as(graph, "debug/io-stats", volinfo->volname); + if (!iostxl) { + ret = -1; + goto out; + } + +- cds_list_for_each_entry(voliter, &priv->volumes, vol_list) +- { +- ret = build_shd_volume_graph(this, graph, voliter, mod_dict, set_dict, +- graph_check, &valid_config); +- ret = dict_reset(set_dict); +- if (ret) +- goto out; +- } ++ ret = build_shd_volume_graph(this, graph, volinfo, mod_dict, set_dict, ++ graph_check); + + out: + if (set_dict) + dict_unref(set_dict); +- if (!valid_config) +- ret = -EINVAL; + return ret; + } + +@@ -6541,6 +6522,10 @@ glusterd_create_volfiles(glusterd_volinfo_t *volinfo) + if (ret) + gf_log(this->name, GF_LOG_ERROR, "Could not generate gfproxy volfiles"); + ++ ret = glusterd_shdsvc_create_volfile(volinfo); ++ if (ret) ++ gf_log(this->name, GF_LOG_ERROR, "Could not generate shd volfiles"); ++ + dict_del_sizen(volinfo->dict, "skip-CLIOT"); + + out: +@@ -6621,7 +6606,7 @@ validate_shdopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, + ret = dict_set_int32_sizen(val_dict, "graph-check", 1); + if (ret) + goto out; +- ret = build_shd_graph(&graph, val_dict); ++ ret = build_shd_graph(volinfo, &graph, val_dict); + if (!ret) + ret = graph_reconf_validateopt(&graph.graph, op_errstr); + +@@ -6998,3 +6983,22 @@ gd_is_boolean_option(char *key) + + return _gf_false; + } ++ ++int ++glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename, ++ dict_t *mode_dict) ++{ ++ int ret = -1; ++ volgen_graph_t graph = { ++ 0, ++ }; ++ ++ graph.type = GF_SHD; ++ ret = build_shd_graph(volinfo, &graph, mode_dict); ++ if (!ret) ++ ret = volgen_write_volfile(&graph, filename); ++ ++ volgen_graph_free(&graph); ++ ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h +index f9fc068..897d8fa 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.h ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h +@@ -66,6 +66,7 @@ typedef enum { + GF_REBALANCED = 1, + GF_QUOTAD, + GF_SNAPD, ++ GF_SHD, + } glusterd_graph_type_t; + + struct volgen_graph { +@@ -77,6 +78,8 @@ typedef struct volgen_graph volgen_graph_t; + + typedef int (*glusterd_graph_builder_t)(volgen_graph_t *graph, + dict_t *mod_dict); ++typedef int (*glusterd_vol_graph_builder_t)(glusterd_volinfo_t *, ++ char *filename, dict_t *mod_dict); + + #define COMPLETE_OPTION(key, completion, ret) \ + do { \ +@@ -201,7 +204,8 @@ void + glusterd_get_shd_filepath(char *filename); + + int +-build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict); ++build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph, ++ dict_t *mod_dict); + + int + build_nfs_graph(volgen_graph_t *graph, dict_t *mod_dict); +@@ -313,4 +317,9 @@ glusterd_generate_gfproxyd_volfile(glusterd_volinfo_t *volinfo); + + int + glusterd_build_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *filename); ++ ++int ++glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename, ++ dict_t *mode_dict); ++ + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 1ea8ba6..4c3ad50 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1940,7 +1940,7 @@ static int + glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + dict_t *dict, char **op_errstr) + { +- glusterd_conf_t *priv = NULL; ++ glusterd_svc_t *svc = NULL; + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; + int ret = 0; + char msg[2408] = { +@@ -1950,7 +1950,6 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + "Self-heal daemon is not running. " + "Check self-heal daemon log file."; + +- priv = this->private; + ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"), + (int32_t *)&heal_op); + if (ret) { +@@ -1959,6 +1958,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + goto out; + } + ++ svc = &(volinfo->shd.svc); + switch (heal_op) { + case GF_SHD_OP_INVALID: + case GF_SHD_OP_HEAL_ENABLE: /* This op should be handled in volume-set*/ +@@ -1988,7 +1988,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + goto out; + } + +- if (!priv->shd_svc.online) { ++ if (!svc->online) { + ret = -1; + *op_errstr = gf_strdup(offline_msg); + goto out; +@@ -2009,7 +2009,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + goto out; + } + +- if (!priv->shd_svc.online) { ++ if (!svc->online) { + ret = -1; + *op_errstr = gf_strdup(offline_msg); + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index ff5af42..89afb9c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1533,14 +1533,6 @@ init(xlator_t *this) + exit(1); + } + +- ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_GLUSTERSHD_RUN_DIR); +- if (ret) { +- gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED, +- "Unable to create " +- "glustershd running directory"); +- exit(1); +- } +- + ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_NFS_RUN_DIR); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED, +@@ -1815,6 +1807,9 @@ init(xlator_t *this) + CDS_INIT_LIST_HEAD(&conf->snapshots); + CDS_INIT_LIST_HEAD(&conf->missed_snaps_list); + CDS_INIT_LIST_HEAD(&conf->brick_procs); ++ CDS_INIT_LIST_HEAD(&conf->shd_procs); ++ pthread_mutex_init(&conf->attach_lock, NULL); ++ pthread_mutex_init(&conf->volume_lock, NULL); + + pthread_mutex_init(&conf->mutex, NULL); + conf->rpc = rpc; +@@ -1895,7 +1890,6 @@ init(xlator_t *this) + glusterd_mgmt_v3_lock_timer_init(); + glusterd_txn_opinfo_dict_init(); + +- glusterd_shdsvc_build(&conf->shd_svc); + glusterd_nfssvc_build(&conf->nfs_svc); + glusterd_quotadsvc_build(&conf->quotad_svc); + glusterd_bitdsvc_build(&conf->bitd_svc); +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index e858ce4..0ac6e63 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -28,6 +28,7 @@ + #include "glusterd-sm.h" + #include "glusterd-snapd-svc.h" + #include "glusterd-tierd-svc.h" ++#include "glusterd-shd-svc.h" + #include "glusterd-bitd-svc.h" + #include "glusterd1-xdr.h" + #include "protocol-common.h" +@@ -170,7 +171,6 @@ typedef struct { + char workdir[VALID_GLUSTERD_PATHMAX]; + char rundir[VALID_GLUSTERD_PATHMAX]; + rpcsvc_t *rpc; +- glusterd_svc_t shd_svc; + glusterd_svc_t nfs_svc; + glusterd_svc_t bitd_svc; + glusterd_svc_t scrub_svc; +@@ -179,6 +179,7 @@ typedef struct { + struct cds_list_head volumes; + struct cds_list_head snapshots; /*List of snap volumes */ + struct cds_list_head brick_procs; /* List of brick processes */ ++ struct cds_list_head shd_procs; /* List of shd processes */ + pthread_mutex_t xprt_lock; + struct list_head xprt_list; + pthread_mutex_t import_volumes; +@@ -219,6 +220,11 @@ typedef struct { + gf_atomic_t blockers; + uint32_t mgmt_v3_lock_timeout; + gf_boolean_t restart_bricks; ++ pthread_mutex_t attach_lock; /* Lock can be per process or a common one */ ++ pthread_mutex_t volume_lock; /* We release the big_lock from lot of places ++ which might lead the modification of volinfo ++ list. ++ */ + } glusterd_conf_t; + + typedef enum gf_brick_status { +@@ -498,6 +504,7 @@ struct glusterd_volinfo_ { + + glusterd_snapdsvc_t snapd; + glusterd_tierdsvc_t tierd; ++ glusterd_shdsvc_t shd; + glusterd_gfproxydsvc_t gfproxyd; + int32_t quota_xattr_version; + gf_boolean_t stage_deleted; /* volume has passed staging +@@ -624,7 +631,6 @@ typedef enum { + #define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps" + #define GLUSTERD_BITD_RUN_DIR "/bitd" + #define GLUSTERD_SCRUB_RUN_DIR "/scrub" +-#define GLUSTERD_GLUSTERSHD_RUN_DIR "/glustershd" + #define GLUSTERD_NFS_RUN_DIR "/nfs" + #define GLUSTERD_QUOTAD_RUN_DIR "/quotad" + #define GLUSTER_SHARED_STORAGE_BRICK_DIR GLUSTERD_DEFAULT_WORKDIR "/ss_brick" +@@ -680,6 +686,26 @@ typedef ssize_t (*gd_serialize_t)(struct iovec outmsg, void *args); + } \ + } while (0) + ++#define GLUSTERD_GET_SHD_RUNDIR(path, volinfo, priv) \ ++ do { \ ++ int32_t _shd_dir_len; \ ++ _shd_dir_len = snprintf(path, PATH_MAX, "%s/shd/%s", priv->rundir, \ ++ volinfo->volname); \ ++ if ((_shd_dir_len < 0) || (_shd_dir_len >= PATH_MAX)) { \ ++ path[0] = 0; \ ++ } \ ++ } while (0) ++ ++#define GLUSTERD_GET_SHD_PID_FILE(path, volinfo, priv) \ ++ do { \ ++ int32_t _shd_pid_len; \ ++ _shd_pid_len = snprintf(path, PATH_MAX, "%s/shd/%s-shd.pid", \ ++ priv->rundir, volinfo->volname); \ ++ if ((_shd_pid_len < 0) || (_shd_pid_len >= PATH_MAX)) { \ ++ path[0] = 0; \ ++ } \ ++ } while (0) ++ + #define GLUSTERD_GET_VOLUME_PID_DIR(path, volinfo, priv) \ + do { \ + int32_t _vol_pid_len; \ +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index 2d75714..19f5175 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -46,7 +46,6 @@ client_fini_complete(xlator_t *this) + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + clnt_conf_t *conf = this->private; +- + if (!conf->destroy) + return 0; + +@@ -69,6 +68,11 @@ client_notify_dispatch_uniq(xlator_t *this, int32_t event, void *data, ...) + return 0; + + return client_notify_dispatch(this, event, data); ++ ++ /* Please avoid any code that access xlator object here ++ * Because for a child down event, once we do the signal ++ * we will start cleanup. ++ */ + } + + int +@@ -105,6 +109,11 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...) + } + pthread_mutex_unlock(&ctx->notify_lock); + ++ /* Please avoid any code that access xlator object here ++ * Because for a child down event, once we do the signal ++ * we will start cleanup. ++ */ ++ + return ret; + } + +@@ -2272,6 +2281,7 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + { + xlator_t *this = NULL; + clnt_conf_t *conf = NULL; ++ gf_boolean_t is_parent_down = _gf_false; + int ret = 0; + + this = mydata; +@@ -2333,6 +2343,19 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + if (conf->portmap_err_logged) + conf->disconnect_err_logged = 1; + } ++ /* ++ * Once we complete the child down notification, ++ * There is a chance that the graph might get freed, ++ * So it is not safe to access any xlator contens ++ * So here we are checking whether the parent is down ++ * or not. ++ */ ++ pthread_mutex_lock(&conf->lock); ++ { ++ is_parent_down = conf->parent_down; ++ } ++ pthread_mutex_unlock(&conf->lock); ++ + /* If the CHILD_DOWN event goes to parent xlator + multiple times, the logic of parent xlator notify + may get screwed up.. (eg. CHILD_MODIFIED event in +@@ -2340,6 +2363,12 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + to parent are genuine */ + ret = client_notify_dispatch_uniq(this, GF_EVENT_CHILD_DOWN, + NULL); ++ if (is_parent_down) { ++ /* If parent is down, then there should not be any ++ * operation after a child down. ++ */ ++ goto out; ++ } + if (ret) + gf_msg(this->name, GF_LOG_INFO, 0, + PC_MSG_CHILD_DOWN_NOTIFY_FAILED, +-- +1.8.3.1 + diff --git a/SOURCES/0099-client-fini-return-fini-after-rpc-cleanup.patch b/SOURCES/0099-client-fini-return-fini-after-rpc-cleanup.patch new file mode 100644 index 0000000..5cff104 --- /dev/null +++ b/SOURCES/0099-client-fini-return-fini-after-rpc-cleanup.patch @@ -0,0 +1,119 @@ +From d79cb2cdff6fe8d962c9ac095a7541ddf500302b Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Mon, 1 Apr 2019 14:44:20 +0530 +Subject: [PATCH 099/124] client/fini: return fini after rpc cleanup + +There is a race condition in rpc_transport later +and client fini. + +Sequence of events to happen the race condition +1) When we want to destroy a graph, we send a parent down + event first +2) Once parent down received on a client xlator, we will + initiates a rpc disconnect +3) This will in turn generates a child down event. +4) When we process child down, we first do fini for + Every xlator +5) On successful return of fini, we delete the graph + +Here after the step 5, there is a chance that the fini +on client might not be finished. Because an rpc_tranpsort +ref can race with the above sequence. + +So we have to wait till all rpc's are successfully freed +before returning the fini from client + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22468/ + +>Change-Id: I20145662d71fb837e448a4d3210d1fcb2855f2d4 +>fixes: bz#1659708 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: I848bcfb9443467caed32bae0717244ab01b407fc +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/167831 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/protocol/client/src/client.c | 25 ++++++++++++++++++++----- + xlators/protocol/client/src/client.h | 6 ++++++ + 2 files changed, 26 insertions(+), 5 deletions(-) + +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index 19f5175..a372807 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -49,11 +49,12 @@ client_fini_complete(xlator_t *this) + if (!conf->destroy) + return 0; + +- this->private = NULL; +- +- pthread_spin_destroy(&conf->fd_lock); +- pthread_mutex_destroy(&conf->lock); +- GF_FREE(conf); ++ pthread_mutex_lock(&conf->lock); ++ { ++ conf->fini_completed = _gf_true; ++ pthread_cond_broadcast(&conf->fini_complete_cond); ++ } ++ pthread_mutex_unlock(&conf->lock); + + out: + return 0; +@@ -2721,6 +2722,7 @@ init(xlator_t *this) + goto out; + + pthread_mutex_init(&conf->lock, NULL); ++ pthread_cond_init(&conf->fini_complete_cond, NULL); + pthread_spin_init(&conf->fd_lock, 0); + INIT_LIST_HEAD(&conf->saved_fds); + +@@ -2779,6 +2781,7 @@ fini(xlator_t *this) + if (!conf) + return; + ++ conf->fini_completed = _gf_false; + conf->destroy = 1; + if (conf->rpc) { + /* cleanup the saved-frames before last unref */ +@@ -2786,6 +2789,18 @@ fini(xlator_t *this) + rpc_clnt_unref(conf->rpc); + } + ++ pthread_mutex_lock(&conf->lock); ++ { ++ while (!conf->fini_completed) ++ pthread_cond_wait(&conf->fini_complete_cond, &conf->lock); ++ } ++ pthread_mutex_unlock(&conf->lock); ++ ++ pthread_spin_destroy(&conf->fd_lock); ++ pthread_mutex_destroy(&conf->lock); ++ pthread_cond_destroy(&conf->fini_complete_cond); ++ GF_FREE(conf); ++ + /* Saved Fds */ + /* TODO: */ + +diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h +index f12fa61..8dcd72f 100644 +--- a/xlators/protocol/client/src/client.h ++++ b/xlators/protocol/client/src/client.h +@@ -235,6 +235,12 @@ typedef struct clnt_conf { + * up, disconnects can be + * logged + */ ++ ++ gf_boolean_t old_protocol; /* used only for old-protocol testing */ ++ pthread_cond_t fini_complete_cond; /* Used to wait till we finsh the fini ++ compltely, ie client_fini_complete ++ to return*/ ++ gf_boolean_t fini_completed; + } clnt_conf_t; + + typedef struct _client_fd_ctx { +-- +1.8.3.1 + diff --git a/SOURCES/0100-clnt-rpc-ref-leak-during-disconnect.patch b/SOURCES/0100-clnt-rpc-ref-leak-during-disconnect.patch new file mode 100644 index 0000000..0eb4b02 --- /dev/null +++ b/SOURCES/0100-clnt-rpc-ref-leak-during-disconnect.patch @@ -0,0 +1,179 @@ +From 4d95e271a9042bf2d789a4d900ad263b6ea47681 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Wed, 23 Jan 2019 21:55:01 +0530 +Subject: [PATCH 100/124] clnt/rpc: ref leak during disconnect. + +During disconnect cleanup, we are not cancelling reconnect +timer, which causes a ref leak each time when a disconnect +happen. + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22087/ + +>Change-Id: I9d05d1f368d080e04836bf6a0bb018bf8f7b5b8a +>updates: bz#1659708 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: I5a2dbb17e663a4809bb4c435cacadbf0ab694a76 +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/167844 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/timer.c | 16 +++++++---- + rpc/rpc-lib/src/rpc-clnt.c | 11 +++++++- + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 32 ++++++++++++++++++---- + 3 files changed, 47 insertions(+), 12 deletions(-) + +diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c +index d882543..2643c07 100644 +--- a/libglusterfs/src/timer.c ++++ b/libglusterfs/src/timer.c +@@ -75,13 +75,13 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event) + if (ctx == NULL || event == NULL) { + gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, + "invalid argument"); +- return 0; ++ return -1; + } + + if (ctx->cleanup_started) { + gf_msg_callingfn("timer", GF_LOG_INFO, 0, LG_MSG_CTX_CLEANUP_STARTED, + "ctx cleanup started"); +- return 0; ++ return -1; + } + + LOCK(&ctx->lock); +@@ -93,10 +93,9 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event) + if (!reg) { + /* This can happen when cleanup may have just started and + * gf_timer_registry_destroy() sets ctx->timer to NULL. +- * Just bail out as success as gf_timer_proc() takes +- * care of cleaning up the events. ++ * gf_timer_proc() takes care of cleaning up the events. + */ +- return 0; ++ return -1; + } + + LOCK(®->lock); +@@ -203,6 +202,13 @@ gf_timer_proc(void *data) + list_for_each_entry_safe(event, tmp, ®->active, list) + { + list_del(&event->list); ++ /* TODO Possible resource leak ++ * Before freeing the event, we need to call the respective ++ * event functions and free any resources. ++ * For example, In case of rpc_clnt_reconnect, we need to ++ * unref rpc object which was taken when added to timer ++ * wheel. ++ */ + GF_FREE(event); + } + } +diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c +index 3f7bb3c..6f47515 100644 +--- a/rpc/rpc-lib/src/rpc-clnt.c ++++ b/rpc/rpc-lib/src/rpc-clnt.c +@@ -495,6 +495,7 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn) + int unref = 0; + int ret = 0; + gf_boolean_t timer_unref = _gf_false; ++ gf_boolean_t reconnect_unref = _gf_false; + + if (!conn) { + goto out; +@@ -514,6 +515,12 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn) + timer_unref = _gf_true; + conn->timer = NULL; + } ++ if (conn->reconnect) { ++ ret = gf_timer_call_cancel(clnt->ctx, conn->reconnect); ++ if (!ret) ++ reconnect_unref = _gf_true; ++ conn->reconnect = NULL; ++ } + + conn->connected = 0; + conn->disconnected = 1; +@@ -533,6 +540,8 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn) + if (timer_unref) + rpc_clnt_unref(clnt); + ++ if (reconnect_unref) ++ rpc_clnt_unref(clnt); + out: + return 0; + } +@@ -830,7 +839,7 @@ rpc_clnt_handle_disconnect(struct rpc_clnt *clnt, rpc_clnt_connection_t *conn) + pthread_mutex_lock(&conn->lock); + { + if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) { +- ts.tv_sec = 10; ++ ts.tv_sec = 3; + ts.tv_nsec = 0; + + rpc_clnt_ref(clnt); +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index 041946d..b3c4158 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -3364,6 +3364,25 @@ out: + return ret; + } + ++int ++glusterd_is_path_mounted(const char *path) ++{ ++ FILE *mtab = NULL; ++ struct mntent *part = NULL; ++ int is_mounted = 0; ++ ++ if ((mtab = setmntent("/etc/mtab", "r")) != NULL) { ++ while ((part = getmntent(mtab)) != NULL) { ++ if ((part->mnt_fsname != NULL) && ++ (strcmp(part->mnt_dir, path)) == 0) { ++ is_mounted = 1; ++ break; ++ } ++ } ++ endmntent(mtab); ++ } ++ return is_mounted; ++} + /* This function will do unmount for snaps. + */ + int32_t +@@ -3388,14 +3407,11 @@ glusterd_snap_unmount(xlator_t *this, glusterd_volinfo_t *volinfo) + continue; + } + +- /* Fetch the brick mount path from the brickinfo->path */ +- ret = glusterd_get_brick_root(brickinfo->path, &brick_mount_path); ++ ret = glusterd_find_brick_mount_path(brickinfo->path, ++ &brick_mount_path); + if (ret) { +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BRICK_PATH_UNMOUNTED, ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_GET_FAIL, + "Failed to find brick_mount_path for %s", brickinfo->path); +- /* There is chance that brick path is already +- * unmounted. */ +- ret = 0; + goto out; + } + /* unmount cannot be done when the brick process is still in +@@ -3440,6 +3456,10 @@ glusterd_umount(const char *path) + GF_ASSERT(this); + GF_ASSERT(path); + ++ if (!glusterd_is_path_mounted(path)) { ++ return 0; ++ } ++ + runinit(&runner); + snprintf(msg, sizeof(msg), "umount path %s", path); + runner_add_args(&runner, _PATH_UMOUNT, "-f", path, NULL); +-- +1.8.3.1 + diff --git a/SOURCES/0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch b/SOURCES/0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch new file mode 100644 index 0000000..f8d0763 --- /dev/null +++ b/SOURCES/0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch @@ -0,0 +1,162 @@ +From 0021a4bbc9af2bfe28d4a79f76c3cd33f23dd118 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Fri, 5 Apr 2019 12:33:55 +0530 +Subject: [PATCH 101/124] shd/mux: Fix coverity issues introduced by shd mux + patch + +CID 1400475: Null pointer dereferences (FORWARD_NULL) +CID 1400474: Null pointer dereferences (FORWARD_NULL) +CID 1400471: Code maintainability issues (UNUSED_VALUE) +CID 1400470: Null pointer dereferences (FORWARD_NULL) +CID 1400469: Memory - illegal accesses (USE_AFTER_FREE) +CID 1400467: Code maintainability issues (UNUSED_VALUE) + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22514/ + +>Change-Id: I0ca1c733be335c6e5844f44850f8066626ac40d4 +>updates: bz#789278 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: I0425efca9ab5a95801eff9e99259219449a16380 +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/167832 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/graph.c | 21 +++++++++++++-------- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 6 ++++++ + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 24 +++++++++++++++++------- + 3 files changed, 36 insertions(+), 15 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index a492dd8..4c8b02d 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1470,7 +1470,9 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + goto out; + parent_graph = ctx->active; + graph = volfile_obj->graph; +- if (graph && graph->first) ++ if (!graph) ++ goto out; ++ if (graph->first) + xl = graph->first; + + last_xl = graph->last_xl; +@@ -1591,12 +1593,10 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, + parent_graph->leaf_count += graph->leaf_count; + parent_graph->id++; + ++ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t); + if (!volfile_obj) { +- volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t); +- if (!volfile_obj) { +- ret = -1; +- goto out; +- } ++ ret = -1; ++ goto out; + } + + graph->used = 1; +@@ -1641,6 +1641,7 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + { + glusterfs_graph_t *oldvolfile_graph = NULL; + glusterfs_graph_t *newvolfile_graph = NULL; ++ char vol_id[NAME_MAX + 1]; + + int ret = -1; + +@@ -1672,6 +1673,9 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first); + + if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) { ++ ret = snprintf(vol_id, sizeof(vol_id), "%s", volfile_obj->vol_id); ++ if (ret < 0) ++ goto out; + ret = glusterfs_process_svc_detach(ctx, volfile_obj); + if (ret) { + gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, +@@ -1680,8 +1684,9 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + "old graph. Aborting the reconfiguration operation"); + goto out; + } +- ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, +- volfile_obj->vol_id, checksum); ++ volfile_obj = NULL; ++ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id, ++ checksum); + goto out; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 937ea30..04a4b2e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -101,6 +101,8 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, + svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc); + ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s", + mux_conn->sockpath); ++ if (ret < 0) ++ goto out; + } else { + ret = mkdir_p(logdir, 0755, _gf_true); + if ((ret == -1) && (EEXIST != errno)) { +@@ -663,6 +665,10 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig) + glusterd_volinfo_ref(volinfo); + svc_proc->data = volinfo; + ret = glusterd_svc_stop(svc, sig); ++ if (ret) { ++ glusterd_volinfo_unref(volinfo); ++ goto out; ++ } + } + if (!empty && pid != -1) { + ret = glusterd_detach_svc(svc, volinfo, sig); +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index e42703c..02945b1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -411,9 +411,14 @@ __gf_find_compatible_svc(gd_node_type daemon) + conf = THIS->private; + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + +- if (daemon == GD_NODE_SHD) { +- svc_procs = &conf->shd_procs; +- if (!svc_procs) ++ switch (daemon) { ++ case GD_NODE_SHD: { ++ svc_procs = &conf->shd_procs; ++ if (!svc_procs) ++ goto out; ++ } break; ++ default: ++ /* Add support for other client daemons here */ + goto out; + } + +@@ -540,11 +545,16 @@ __gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid) + if (!conf) + return NULL; + +- if (daemon == GD_NODE_SHD) { +- svc_procs = &conf->shd_procs; +- if (!svc_proc) ++ switch (daemon) { ++ case GD_NODE_SHD: { ++ svc_procs = &conf->shd_procs; ++ if (!svc_procs) ++ return NULL; ++ } break; ++ default: ++ /* Add support for other client daemons here */ + return NULL; +- } /* Can be moved to switch when mux is implemented for other daemon; */ ++ } + + cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list) + { +-- +1.8.3.1 + diff --git a/SOURCES/0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch b/SOURCES/0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch new file mode 100644 index 0000000..39fe021 --- /dev/null +++ b/SOURCES/0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch @@ -0,0 +1,737 @@ +From df6523ed3c5267624197b52edcb553fc2d8a08f2 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Tue, 26 Feb 2019 18:04:18 +0530 +Subject: [PATCH 102/124] rpc/transport: Missing a ref on dict while creating + transport object + +while creating rpc_tranpsort object, we store a dictionary without +taking a ref on dict but it does an unref during the cleaning of the +transport object. + +So the rpc layer expect the caller to take a ref on the dictionary +before passing dict to rpc layer. This leads to a lot of confusion +across the code base and leads to ref leaks. + +Semantically, this is not correct. It is the rpc layer responsibility +to take a ref when storing it, and free during the cleanup. + +I'm listing down the total issues or leaks across the code base because +of this confusion. These issues are currently present in the upstream +master. + +1) changelog_rpc_client_init + +2) quota_enforcer_init + +3) rpcsvc_create_listeners : when there are two transport, like tcp,rdma. + +4) quotad_aggregator_init + +5) glusterd: init + +6) nfs3_init_state + +7) server: init + +8) client:init + +This patch does the cleanup according to the semantics. + +Backport of : https://review.gluster.org/#/c/glusterfs/+/22266/ + +>Change-Id: I46373af9630373eb375ee6de0e6f2bbe2a677425 +>updates: bz#1659708 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: Iff978497e11592fbebfa4b683fdc56698b782859 +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/167847 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + api/src/glfs-mgmt.c | 10 ++++-- + cli/src/cli.c | 20 +++++++----- + glusterfsd/src/glusterfsd-mgmt.c | 18 ++++++++-- + rpc/rpc-lib/src/rpc-clnt.c | 2 -- + rpc/rpc-lib/src/rpc-transport.c | 38 +++++++--------------- + rpc/rpc-lib/src/rpc-transport.h | 4 +-- + rpc/rpc-lib/src/rpcsvc.c | 13 ++------ + rpc/rpc-lib/src/rpcsvc.h | 2 +- + .../features/changelog/src/changelog-rpc-common.c | 9 +++-- + .../snapview-server/src/snapview-server-mgmt.c | 8 ++++- + xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c | 8 ++++- + xlators/mgmt/glusterd/src/glusterd-handler.c | 18 ++++++---- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 8 ++++- + xlators/mgmt/glusterd/src/glusterd-utils.c | 9 +++-- + xlators/mgmt/glusterd/src/glusterd.c | 6 +++- + xlators/nfs/server/src/acl3.c | 5 +++ + xlators/nfs/server/src/mount3.c | 5 +++ + xlators/nfs/server/src/nlm4.c | 7 ++++ + 18 files changed, 119 insertions(+), 71 deletions(-) + +diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c +index d502b4f..7476d5b 100644 +--- a/api/src/glfs-mgmt.c ++++ b/api/src/glfs-mgmt.c +@@ -1015,6 +1015,10 @@ glfs_mgmt_init(struct glfs *fs) + if (ctx->mgmt) + return 0; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + if (cmd_args->volfile_server_port) + port = cmd_args->volfile_server_port; + +@@ -1029,11 +1033,11 @@ glfs_mgmt_init(struct glfs *fs) + + if (cmd_args->volfile_server_transport && + !strcmp(cmd_args->volfile_server_transport, "unix")) { +- ret = rpc_transport_unix_options_build(&options, host, 0); ++ ret = rpc_transport_unix_options_build(options, host, 0); + } else { + xlator_cmdline_option_t *opt = find_xlator_option_in_cmd_args_t( + "address-family", cmd_args); +- ret = rpc_transport_inet_options_build(&options, host, port, ++ ret = rpc_transport_inet_options_build(options, host, port, + (opt ? opt->value : NULL)); + } + +@@ -1075,5 +1079,7 @@ glfs_mgmt_init(struct glfs *fs) + + ret = rpc_clnt_start(rpc); + out: ++ if (options) ++ dict_unref(options); + return ret; + } +diff --git a/cli/src/cli.c b/cli/src/cli.c +index c33d152..ff39a98 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -661,9 +661,8 @@ cli_quotad_clnt_rpc_init(void) + + global_quotad_rpc = rpc; + out: +- if (ret) { +- if (rpc_opts) +- dict_unref(rpc_opts); ++ if (rpc_opts) { ++ dict_unref(rpc_opts); + } + return rpc; + } +@@ -685,6 +684,10 @@ cli_rpc_init(struct cli_state *state) + this = THIS; + cli_rpc_prog = &cli_prog; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + /* If address family specified in CLI */ + if (state->address_family) { + addr_family = state->address_family; +@@ -699,7 +702,7 @@ cli_rpc_init(struct cli_state *state) + "Connecting to glusterd using " + "sockfile %s", + state->glusterd_sock); +- ret = rpc_transport_unix_options_build(&options, state->glusterd_sock, ++ ret = rpc_transport_unix_options_build(options, state->glusterd_sock, + 0); + if (ret) + goto out; +@@ -709,10 +712,6 @@ cli_rpc_init(struct cli_state *state) + "%s", + state->remote_host); + +- options = dict_new(); +- if (!options) +- goto out; +- + ret = dict_set_str(options, "remote-host", state->remote_host); + if (ret) + goto out; +@@ -731,7 +730,7 @@ cli_rpc_init(struct cli_state *state) + gf_log("cli", GF_LOG_DEBUG, + "Connecting to glusterd using " + "default socket"); +- ret = rpc_transport_unix_options_build(&options, ++ ret = rpc_transport_unix_options_build(options, + DEFAULT_GLUSTERD_SOCKFILE, 0); + if (ret) + goto out; +@@ -749,6 +748,9 @@ cli_rpc_init(struct cli_state *state) + + ret = rpc_clnt_start(rpc); + out: ++ if (options) ++ dict_unref(options); ++ + if (ret) { + if (rpc) + rpc_clnt_unref(rpc); +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index a89c980..1d2cd1a 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -2781,7 +2781,11 @@ glusterfs_listener_init(glusterfs_ctx_t *ctx) + if (!cmd_args->sock_file) + return 0; + +- ret = rpcsvc_transport_unix_options_build(&options, cmd_args->sock_file); ++ options = dict_new(); ++ if (!options) ++ goto out; ++ ++ ret = rpcsvc_transport_unix_options_build(options, cmd_args->sock_file); + if (ret) + goto out; + +@@ -2808,6 +2812,8 @@ glusterfs_listener_init(glusterfs_ctx_t *ctx) + ctx->listener = rpc; + + out: ++ if (options) ++ dict_unref(options); + return ret; + } + +@@ -2889,6 +2895,10 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx) + if (ctx->mgmt) + return 0; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + LOCK_INIT(&ctx->volfile_lock); + + if (cmd_args->volfile_server_port) +@@ -2898,10 +2908,10 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx) + + if (cmd_args->volfile_server_transport && + !strcmp(cmd_args->volfile_server_transport, "unix")) { +- ret = rpc_transport_unix_options_build(&options, host, 0); ++ ret = rpc_transport_unix_options_build(options, host, 0); + } else { + opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args); +- ret = rpc_transport_inet_options_build(&options, host, port, ++ ret = rpc_transport_inet_options_build(options, host, port, + (opt ? opt->value : NULL)); + } + if (ret) +@@ -2950,6 +2960,8 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx) + + ret = rpc_clnt_start(rpc); + out: ++ if (options) ++ dict_unref(options); + return ret; + } + +diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c +index 6f47515..b04eaed 100644 +--- a/rpc/rpc-lib/src/rpc-clnt.c ++++ b/rpc/rpc-lib/src/rpc-clnt.c +@@ -1125,8 +1125,6 @@ rpc_clnt_new(dict_t *options, xlator_t *owner, char *name, + mem_pool_destroy(rpc->saved_frames_pool); + GF_FREE(rpc); + rpc = NULL; +- if (options) +- dict_unref(options); + goto out; + } + +diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c +index 4beaaf9..bed1f8c 100644 +--- a/rpc/rpc-lib/src/rpc-transport.c ++++ b/rpc/rpc-lib/src/rpc-transport.c +@@ -168,6 +168,11 @@ rpc_transport_cleanup(rpc_transport_t *trans) + if (trans->fini) + trans->fini(trans); + ++ if (trans->options) { ++ dict_unref(trans->options); ++ trans->options = NULL; ++ } ++ + GF_FREE(trans->name); + + if (trans->xl) +@@ -352,7 +357,7 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name) + } + } + +- trans->options = options; ++ trans->options = dict_ref(options); + + pthread_mutex_init(&trans->lock, NULL); + trans->xl = this; +@@ -591,19 +596,14 @@ out: + } + + int +-rpc_transport_unix_options_build(dict_t **options, char *filepath, ++rpc_transport_unix_options_build(dict_t *dict, char *filepath, + int frame_timeout) + { +- dict_t *dict = NULL; + char *fpath = NULL; + int ret = -1; + + GF_ASSERT(filepath); +- GF_ASSERT(options); +- +- dict = dict_new(); +- if (!dict) +- goto out; ++ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out); + + fpath = gf_strdup(filepath); + if (!fpath) { +@@ -638,20 +638,14 @@ rpc_transport_unix_options_build(dict_t **options, char *filepath, + if (ret) + goto out; + } +- +- *options = dict; + out: +- if (ret && dict) { +- dict_unref(dict); +- } + return ret; + } + + int +-rpc_transport_inet_options_build(dict_t **options, const char *hostname, +- int port, char *af) ++rpc_transport_inet_options_build(dict_t *dict, const char *hostname, int port, ++ char *af) + { +- dict_t *dict = NULL; + char *host = NULL; + int ret = -1; + #ifdef IPV6_DEFAULT +@@ -660,13 +654,9 @@ rpc_transport_inet_options_build(dict_t **options, const char *hostname, + char *addr_family = "inet"; + #endif + +- GF_ASSERT(options); + GF_ASSERT(hostname); + GF_ASSERT(port >= 1024); +- +- dict = dict_new(); +- if (!dict) +- goto out; ++ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out); + + host = gf_strdup((char *)hostname); + if (!host) { +@@ -702,12 +692,6 @@ rpc_transport_inet_options_build(dict_t **options, const char *hostname, + "failed to set trans-type with socket"); + goto out; + } +- +- *options = dict; + out: +- if (ret && dict) { +- dict_unref(dict); +- } +- + return ret; + } +diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h +index 9e75d1a..64b7e9b 100644 +--- a/rpc/rpc-lib/src/rpc-transport.h ++++ b/rpc/rpc-lib/src/rpc-transport.h +@@ -303,11 +303,11 @@ rpc_transport_keepalive_options_set(dict_t *options, int32_t interval, + int32_t time, int32_t timeout); + + int +-rpc_transport_unix_options_build(dict_t **options, char *filepath, ++rpc_transport_unix_options_build(dict_t *options, char *filepath, + int frame_timeout); + + int +-rpc_transport_inet_options_build(dict_t **options, const char *hostname, ++rpc_transport_inet_options_build(dict_t *options, const char *hostname, + int port, char *af); + + void +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 74373c4..5a35139 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -2615,18 +2615,13 @@ rpcsvc_reconfigure_options(rpcsvc_t *svc, dict_t *options) + } + + int +-rpcsvc_transport_unix_options_build(dict_t **options, char *filepath) ++rpcsvc_transport_unix_options_build(dict_t *dict, char *filepath) + { +- dict_t *dict = NULL; + char *fpath = NULL; + int ret = -1; + + GF_ASSERT(filepath); +- GF_ASSERT(options); +- +- dict = dict_new(); +- if (!dict) +- goto out; ++ GF_VALIDATE_OR_GOTO("rpcsvc", dict, out); + + fpath = gf_strdup(filepath); + if (!fpath) { +@@ -2649,13 +2644,9 @@ rpcsvc_transport_unix_options_build(dict_t **options, char *filepath) + ret = dict_set_str(dict, "transport-type", "socket"); + if (ret) + goto out; +- +- *options = dict; + out: + if (ret) { + GF_FREE(fpath); +- if (dict) +- dict_unref(dict); + } + return ret; + } +diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h +index 34045ce..a51edc7 100644 +--- a/rpc/rpc-lib/src/rpcsvc.h ++++ b/rpc/rpc-lib/src/rpcsvc.h +@@ -665,7 +665,7 @@ rpcsvc_actor_t * + rpcsvc_program_actor(rpcsvc_request_t *req); + + int +-rpcsvc_transport_unix_options_build(dict_t **options, char *filepath); ++rpcsvc_transport_unix_options_build(dict_t *options, char *filepath); + int + rpcsvc_set_allow_insecure(rpcsvc_t *svc, dict_t *options); + int +diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c +index cf35175..dcdcfb1 100644 +--- a/xlators/features/changelog/src/changelog-rpc-common.c ++++ b/xlators/features/changelog/src/changelog-rpc-common.c +@@ -47,7 +47,7 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile, + if (!options) + goto error_return; + +- ret = rpc_transport_unix_options_build(&options, sockfile, 0); ++ ret = rpc_transport_unix_options_build(options, sockfile, 0); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR, + "failed to build rpc options"); +@@ -73,6 +73,7 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile, + goto dealloc_rpc_clnt; + } + ++ dict_unref(options); + return rpc; + + dealloc_rpc_clnt: +@@ -303,7 +304,11 @@ changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata, + if (!cbkdata) + cbkdata = this; + +- ret = rpcsvc_transport_unix_options_build(&options, sockfile); ++ options = dict_new(); ++ if (!options) ++ return NULL; ++ ++ ret = rpcsvc_transport_unix_options_build(options, sockfile); + if (ret) + goto dealloc_dict; + +diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c +index b608cdf..bc415ef 100644 +--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c ++++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c +@@ -101,8 +101,12 @@ svs_mgmt_init(xlator_t *this) + if (cmd_args->volfile_server) + host = cmd_args->volfile_server; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args); +- ret = rpc_transport_inet_options_build(&options, host, port, ++ ret = rpc_transport_inet_options_build(options, host, port, + (opt != NULL ? opt->value : NULL)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_BUILD_TRNSPRT_OPT_FAILED, +@@ -145,6 +149,8 @@ svs_mgmt_init(xlator_t *this) + gf_msg_debug(this->name, 0, "svs mgmt init successful"); + + out: ++ if (options) ++ dict_unref(options); + if (ret) + if (priv) { + rpc_clnt_connection_cleanup(&priv->rpc->conn); +diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c +index 052438c..16eefa1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c +@@ -29,6 +29,10 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout, + if (!this) + goto out; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + svc = glusterd_conn_get_svc_object(conn); + if (!svc) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, +@@ -36,7 +40,7 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout, + goto out; + } + +- ret = rpc_transport_unix_options_build(&options, sockpath, frame_timeout); ++ ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout); + if (ret) + goto out; + +@@ -66,6 +70,8 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout, + conn->rpc = rpc; + conn->notify = notify; + out: ++ if (options) ++ dict_unref(options); + if (ret) { + if (rpc) { + rpc_clnt_unref(rpc); +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 1cb9013..6147995 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -3493,11 +3493,10 @@ out: + } + + int +-glusterd_transport_inet_options_build(dict_t **options, const char *hostname, ++glusterd_transport_inet_options_build(dict_t *dict, const char *hostname, + int port, char *af) + { + xlator_t *this = NULL; +- dict_t *dict = NULL; + int32_t interval = -1; + int32_t time = -1; + int32_t timeout = -1; +@@ -3505,14 +3504,14 @@ glusterd_transport_inet_options_build(dict_t **options, const char *hostname, + + this = THIS; + GF_ASSERT(this); +- GF_ASSERT(options); ++ GF_ASSERT(dict); + GF_ASSERT(hostname); + + if (!port) + port = GLUSTERD_DEFAULT_PORT; + + /* Build default transport options */ +- ret = rpc_transport_inet_options_build(&dict, hostname, port, af); ++ ret = rpc_transport_inet_options_build(dict, hostname, port, af); + if (ret) + goto out; + +@@ -3552,7 +3551,6 @@ glusterd_transport_inet_options_build(dict_t **options, const char *hostname, + if ((interval > 0) || (time > 0)) + ret = rpc_transport_keepalive_options_set(dict, interval, time, + timeout); +- *options = dict; + out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +@@ -3572,6 +3570,10 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo, + if (!peerctx) + goto out; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + if (args) + peerctx->args = *args; + +@@ -3586,7 +3588,7 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo, + if (ret) + gf_log(this->name, GF_LOG_TRACE, + "option transport.address-family is not set in xlator options"); +- ret = glusterd_transport_inet_options_build(&options, peerinfo->hostname, ++ ret = glusterd_transport_inet_options_build(options, peerinfo->hostname, + peerinfo->port, af); + if (ret) + goto out; +@@ -3596,6 +3598,7 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo, + * create our RPC endpoint with the same address that the peer would + * use to reach us. + */ ++ + if (this->options) { + data = dict_getn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address")); +@@ -3637,6 +3640,9 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo, + peerctx = NULL; + ret = 0; + out: ++ if (options) ++ dict_unref(options); ++ + GF_FREE(peerctx); + return ret; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index ed5ded5..cbed9a9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -391,6 +391,10 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + if (!defrag) + goto out; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo); + /* Check if defrag sockfile exists in the new location + * in /var/run/ , if it does not try the old location +@@ -420,7 +424,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + * default timeout of 30mins used for unreliable network connections is + * too long for unix domain socket connections. + */ +- ret = rpc_transport_unix_options_build(&options, sockfile, 600); ++ ret = rpc_transport_unix_options_build(options, sockfile, 600); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNIX_OP_BUILD_FAIL, + "Unix options build failed"); +@@ -437,6 +441,8 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + } + ret = 0; + out: ++ if (options) ++ dict_unref(options); + return ret; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index ef664c2..2dd5f91 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -1980,7 +1980,11 @@ glusterd_brick_connect(glusterd_volinfo_t *volinfo, + * The default timeout of 30mins used for unreliable network + * connections is too long for unix domain socket connections. + */ +- ret = rpc_transport_unix_options_build(&options, socketpath, 600); ++ options = dict_new(); ++ if (!options) ++ goto out; ++ ++ ret = rpc_transport_unix_options_build(options, socketpath, 600); + if (ret) + goto out; + +@@ -1999,7 +2003,8 @@ glusterd_brick_connect(glusterd_volinfo_t *volinfo, + brickinfo->rpc = rpc; + } + out: +- ++ if (options) ++ dict_unref(options); + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index 89afb9c..d4ab630 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1111,11 +1111,15 @@ glusterd_init_uds_listener(xlator_t *this) + + GF_ASSERT(this); + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + sock_data = dict_get(this->options, "glusterd-sockfile"); + (void)snprintf(sockfile, sizeof(sockfile), "%s", + sock_data ? sock_data->data : DEFAULT_GLUSTERD_SOCKFILE); + +- ret = rpcsvc_transport_unix_options_build(&options, sockfile); ++ ret = rpcsvc_transport_unix_options_build(options, sockfile); + if (ret) + goto out; + +diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c +index 0eca45d..2ede24b 100644 +--- a/xlators/nfs/server/src/acl3.c ++++ b/xlators/nfs/server/src/acl3.c +@@ -787,9 +787,14 @@ acl3svc_init(xlator_t *nfsx) + goto err; + } + ++ if (options) ++ dict_unref(options); ++ + acl3_inited = _gf_true; + return &acl3prog; + err: ++ if (options) ++ dict_unref(options); + return NULL; + } + +diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c +index 726dc29..396809c 100644 +--- a/xlators/nfs/server/src/mount3.c ++++ b/xlators/nfs/server/src/mount3.c +@@ -4102,8 +4102,13 @@ mnt3svc_init(xlator_t *nfsx) + gf_msg_debug(GF_MNT, GF_LOG_DEBUG, "Thread creation failed"); + } + } ++ if (options) ++ dict_unref(options); ++ + return &mnt3prog; + err: ++ if (options) ++ dict_unref(options); + return NULL; + } + +diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c +index a341ebd..c3c1453 100644 +--- a/xlators/nfs/server/src/nlm4.c ++++ b/xlators/nfs/server/src/nlm4.c +@@ -1121,6 +1121,8 @@ nlm4_establish_callback(nfs3_call_state_t *cs, call_frame_t *cbk_frame) + ret = 0; + + err: ++ if (options) ++ dict_unref(options); + if (ret == -1) { + if (rpc_clnt) + rpc_clnt_unref(rpc_clnt); +@@ -2708,8 +2710,13 @@ nlm4svc_init(xlator_t *nfsx) + + gf_timer_call_after(nfsx->ctx, timeout, nlm_grace_period_over, NULL); + nlm4_inited = _gf_true; ++ ++ if (options) ++ dict_unref(options); + return &nlm4prog; + err: ++ if (options) ++ dict_unref(options); + return NULL; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0103-dht-NULL-check-before-setting-error-flag.patch b/SOURCES/0103-dht-NULL-check-before-setting-error-flag.patch new file mode 100644 index 0000000..addd4f7 --- /dev/null +++ b/SOURCES/0103-dht-NULL-check-before-setting-error-flag.patch @@ -0,0 +1,43 @@ +From 45c9eeb5544738d4d1d0aefb8a7f61e5d8859ad8 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Tue, 12 Mar 2019 18:00:37 +0530 +Subject: [PATCH 103/124] dht: NULL check before setting error flag + +Function dht_common_mark_mdsxattr blindly setting value for +an integer pointer without validating it. In fact there are +two callers of this function that passes NULL value to the +same pointer which leads to a crash. + +Backport of : https://review.gluster.org/#/c/22345/ + +>Change-Id: Id94ffe216f6a21f007b3291bff0b1e1c1989075c +>fixes: bz#1687811 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: Id9785c16184fd80e8184e5ae135fb63bf44692cd +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/167846 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/cluster/dht/src/dht-common.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 367548f..2a68193 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -852,7 +852,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, + "Failed to get hashed subvol for path %s" + "gfid is %s ", + local->loc.path, gfid_local); +- (*errst) = 1; ++ if (errst) ++ (*errst) = 1; + ret = -1; + goto out; + } +-- +1.8.3.1 + diff --git a/SOURCES/0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch b/SOURCES/0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch new file mode 100644 index 0000000..214ccb4 --- /dev/null +++ b/SOURCES/0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch @@ -0,0 +1,151 @@ +From faaaa3452ceec6afcc18cffc9beca3fe19841cce Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Thu, 3 Jan 2019 17:44:18 +0530 +Subject: [PATCH 104/124] afr/shd: Cleanup self heal daemon resources during + afr fini + +We were not properly cleaning self-heal daemon resources +during afr fini. This patch will clean the same. + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22151/ + +>Change-Id: I597860be6f781b195449e695d871b8667a418d5a +>updates: bz#1659708 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: I7be981b9c2476c8cacadea6b14d74234f67b714f +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/167845 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/syncop-utils.c | 8 +++++ + xlators/cluster/afr/src/afr-self-heald.c | 2 ++ + xlators/cluster/afr/src/afr.c | 57 ++++++++++++++++++++++++++++++++ + 3 files changed, 67 insertions(+) + +diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c +index be03527..b842142 100644 +--- a/libglusterfs/src/syncop-utils.c ++++ b/libglusterfs/src/syncop-utils.c +@@ -350,6 +350,11 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, + gf_boolean_t cond_init = _gf_false; + gf_boolean_t mut_init = _gf_false; + gf_dirent_t entries; ++ xlator_t *this = NULL; ++ ++ if (frame) { ++ this = frame->this; ++ } + + /*For this functionality to be implemented in general, we need + * synccond_t infra which doesn't block the executing thread. Until then +@@ -397,6 +402,9 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, + + list_for_each_entry_safe(entry, tmp, &entries.list, list) + { ++ if (this && this->cleanup_starting) ++ goto out; ++ + list_del_init(&entry->list); + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) { + gf_dirent_entry_free(entry); +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 7eb1207..8bc4720 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -373,6 +373,7 @@ afr_shd_sweep_prepare(struct subvol_healer *healer) + + time(&event->start_time); + event->end_time = 0; ++ _mask_cancellation(); + } + + void +@@ -394,6 +395,7 @@ afr_shd_sweep_done(struct subvol_healer *healer) + + if (eh_save_history(shd->statistics[healer->subvol], history) < 0) + GF_FREE(history); ++ _unmask_cancellation(); + } + + int +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index 33258a0..a0a7551 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -611,13 +611,70 @@ init(xlator_t *this) + out: + return ret; + } ++void ++afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer) ++{ ++ int ret = -1; ++ ++ if (!healer) ++ return; ++ ++ if (healer->running) { ++ /* ++ * If there are any resources to cleanup, We need ++ * to do that gracefully using pthread_cleanup_push ++ */ ++ ret = gf_thread_cleanup_xint(healer->thread); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED, ++ "Failed to clean up healer threads."); ++ healer->thread = 0; ++ } ++ pthread_cond_destroy(&healer->cond); ++ pthread_mutex_destroy(&healer->mutex); ++} ++ ++void ++afr_selfheal_daemon_fini(xlator_t *this) ++{ ++ struct subvol_healer *healer = NULL; ++ afr_self_heald_t *shd = NULL; ++ afr_private_t *priv = NULL; ++ int i = 0; ++ ++ priv = this->private; ++ if (!priv) ++ return; ++ ++ shd = &priv->shd; ++ if (!shd->iamshd) ++ return; ++ ++ for (i = 0; i < priv->child_count; i++) { ++ healer = &shd->index_healers[i]; ++ afr_destroy_healer_object(this, healer); + ++ healer = &shd->full_healers[i]; ++ afr_destroy_healer_object(this, healer); ++ ++ if (shd->statistics[i]) ++ eh_destroy(shd->statistics[i]); ++ } ++ GF_FREE(shd->index_healers); ++ GF_FREE(shd->full_healers); ++ GF_FREE(shd->statistics); ++ if (shd->split_brain) ++ eh_destroy(shd->split_brain); ++} + void + fini(xlator_t *this) + { + afr_private_t *priv = NULL; + + priv = this->private; ++ ++ afr_selfheal_daemon_fini(this); ++ + LOCK(&priv->lock); + if (priv->timer != NULL) { + gf_timer_call_cancel(this->ctx, priv->timer); +-- +1.8.3.1 + diff --git a/SOURCES/0105-core-Log-level-changes-do-not-effect-on-running-clie.patch b/SOURCES/0105-core-Log-level-changes-do-not-effect-on-running-clie.patch new file mode 100644 index 0000000..a735794 --- /dev/null +++ b/SOURCES/0105-core-Log-level-changes-do-not-effect-on-running-clie.patch @@ -0,0 +1,336 @@ +From 023854d5573211d4737eb0ebe7ec954a7b7bb4ee Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 15 Apr 2019 10:34:34 +0530 +Subject: [PATCH 105/124] core: Log level changes do not effect on running + client process + +Problem: commit c34e4161f3cb6539ec83a9020f3d27eb4759a975 set log-level + per xlator during reconfigure only for a brick process not for + the client process. + +Solution: 1) Change per xlator log-level only if brick_mux is enabled.To make sure + about brick multiplex introudce a flag brick_mux at ctx->cmd_args. + +Note: There are two other changes done with this patch + 1) Ignore client-log-level option to attach a brick with + already running brick if brick_mux is enabled + 2) Add a log to print pid of the running process to make easier + debugging + +> Change-Id: I39e85de778e150d0685cd9a79425ce8b4783f9c9 +> Signed-off-by: Mohit Agrawal +> Fixes: bz#1696046 +> (Cherry picked from commit 798aadbe51a9a02dd98a0f861cc239ecf7c8ed57) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22495/) + +Change-Id: If82cc8e51cf00bd50d3321d31ec420f89786ea02 +Fixes: bz#1695081 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/167828 +Tested-by: Mohit Agrawal +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + glusterfsd/src/glusterfsd-messages.h | 2 +- + glusterfsd/src/glusterfsd.c | 20 ++++- + glusterfsd/src/glusterfsd.h | 1 + + libglusterfs/src/glusterfs/glusterfs.h | 1 + + tests/bugs/glusterd/bug-1696046.t | 113 +++++++++++++++++++++++++++++ + xlators/debug/io-stats/src/io-stats.c | 22 +++--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 7 ++ + 7 files changed, 152 insertions(+), 14 deletions(-) + create mode 100644 tests/bugs/glusterd/bug-1696046.t + +diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h +index 94312a5..280624c 100644 +--- a/glusterfsd/src/glusterfsd-messages.h ++++ b/glusterfsd/src/glusterfsd-messages.h +@@ -36,6 +36,6 @@ GLFS_MSGID(GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3, + glusterfsd_msg_31, glusterfsd_msg_32, glusterfsd_msg_33, + glusterfsd_msg_34, glusterfsd_msg_35, glusterfsd_msg_36, + glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39, +- glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42); ++ glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42, glusterfsd_msg_43); + + #endif /* !_GLUSTERFSD_MESSAGES_H_ */ +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 3aa89ca..6aee4c1 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -85,8 +85,7 @@ static char gf_doc[] = ""; + static char argp_doc[] = + "--volfile-server=SERVER [MOUNT-POINT]\n" + "--volfile=VOLFILE [MOUNT-POINT]"; +-const char *argp_program_version = +- PACKAGE_NAME" "PACKAGE_VERSION; ++const char *argp_program_version = PACKAGE_NAME " " PACKAGE_VERSION; + const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">"; + + static error_t +@@ -266,6 +265,7 @@ static struct argp_option gf_options[] = { + "attribute, dentry and page-cache. " + "Disable this only if same files/directories are not accessed across " + "two different mounts concurrently [default: \"on\"]"}, ++ {"brick-mux", ARGP_BRICK_MUX_KEY, 0, 0, "Enable brick mux. "}, + {0, 0, 0, 0, "Miscellaneous Options:"}, + { + 0, +@@ -702,7 +702,6 @@ create_fuse_mount(glusterfs_ctx_t *ctx) + xlator_t *master = NULL; + + cmd_args = &ctx->cmd_args; +- + if (!cmd_args->mount_point) { + gf_msg_trace("glusterfsd", 0, + "mount point not found, not a client process"); +@@ -1090,6 +1089,10 @@ parse_opts(int key, char *arg, struct argp_state *state) + cmd_args->thin_client = _gf_true; + break; + ++ case ARGP_BRICK_MUX_KEY: ++ cmd_args->brick_mux = _gf_true; ++ break; ++ + case ARGP_PID_FILE_KEY: + cmd_args->pid_file = gf_strdup(arg); + break; +@@ -1207,7 +1210,6 @@ parse_opts(int key, char *arg, struct argp_state *state) + case ARGP_KEY_ARG: + if (state->arg_num >= 1) + argp_usage(state); +- + cmd_args->mount_point = gf_strdup(arg); + break; + +@@ -2540,6 +2542,8 @@ postfork: + if (ret) + goto out; + } ++ gf_log("glusterfs", GF_LOG_INFO, "Pid of current running process is %d", ++ getpid()); + ret = gf_log_inject_timer_event(ctx); + + glusterfs_signals_setup(ctx); +@@ -2787,6 +2791,14 @@ main(int argc, char *argv[]) + if (ret) + goto out; + ++ /* set brick_mux mode only for server process */ ++ if ((ctx->process_mode != GF_SERVER_PROCESS) && cmd->brick_mux) { ++ gf_msg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_43, ++ "command line argument --brick-mux is valid only for brick " ++ "process"); ++ goto out; ++ } ++ + /* log the version of glusterfs running here along with the actual + command line options. */ + { +diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h +index 35cf6d8..fa55789 100644 +--- a/glusterfsd/src/glusterfsd.h ++++ b/glusterfsd/src/glusterfsd.h +@@ -111,6 +111,7 @@ enum argp_option_keys { + ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189, + ARGP_FUSE_LRU_LIMIT_KEY = 190, + ARGP_FUSE_AUTO_INVAL_KEY = 191, ++ ARGP_BRICK_MUX_KEY = 192 + }; + + struct _gfd_vol_top_priv { +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index deec5ba..fb727fc 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -575,6 +575,7 @@ struct _cmd_args { + + int fuse_flush_handle_interrupt; + int fuse_auto_inval; ++ bool brick_mux; + }; + typedef struct _cmd_args cmd_args_t; + +diff --git a/tests/bugs/glusterd/bug-1696046.t b/tests/bugs/glusterd/bug-1696046.t +new file mode 100644 +index 0000000..e1c1eb2 +--- /dev/null ++++ b/tests/bugs/glusterd/bug-1696046.t +@@ -0,0 +1,113 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++function count_up_bricks { ++ $CLI --xml volume status $1 | grep '1' | wc -l ++} ++ ++function count_brick_processes { ++ pgrep glusterfsd | wc -l ++} ++ ++logdir=`gluster --print-logdir` ++ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++ ++TEST $CLI volume set all cluster.brick-multiplex on ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3}; ++TEST $CLI volume create $V1 replica 3 $H0:$B0/${V1}{1,2,3}; ++ ++## Start volume and verify ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $CLI volume start $V1; ++EXPECT 'Started' volinfo_field $V1 'Status'; ++ ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V1 ++ ++EXPECT 1 count_brick_processes ++ ++# Mount V0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++function client-log-file-name() ++{ ++ logfilename=$M0".log" ++ echo ${logfilename:1} | tr / - ++} ++ ++function brick-log-file-name() ++{ ++ logfilename=$B0"/"$V0"1.log" ++ echo ${logfilename:1} | tr / - ++} ++ ++log_file=$logdir"/"`client-log-file-name` ++nofdlog=$(cat $log_file | grep " D " | wc -l) ++TEST [ $((nofdlog)) -eq 0 ] ++ ++brick_log_file=$logdir"/bricks/"`brick-log-file-name` ++nofdlog=$(cat $brick_log_file | grep " D " | wc -l) ++TEST [ $((nofdlog)) -eq 0 ] ++ ++## Set brick-log-level to DEBUG ++TEST $CLI volume set $V0 diagnostics.brick-log-level DEBUG ++ ++# Do some operation ++touch $M0/file1 ++ ++# Check debug message debug message should be exist only for V0 ++# Server xlator is common in brick_mux so after enabling DEBUG log ++# some debug message should be available for other xlators like posix ++ ++brick_log_file=$logdir"/bricks/"`brick-log-file-name` ++nofdlog=$(cat $brick_log_file | grep file1 | grep -v server | wc -l) ++TEST [ $((nofdlog)) -ne 0 ] ++ ++#Check if any debug log exist in client-log file ++nofdlog=$(cat $log_file | grep " D " | wc -l) ++TEST [ $((nofdlog)) -eq 0 ] ++ ++## Set brick-log-level to INFO ++TEST $CLI volume set $V0 diagnostics.brick-log-level INFO ++ ++## Set client-log-level to DEBUG ++TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG ++ ++# Do some operation ++touch $M0/file2 ++ ++nofdlog=$(cat $brick_log_file | grep " D " | grep file2 | wc -l) ++TEST [ $((nofdlog)) -eq 0 ] ++ ++nofdlog=$(cat $log_file | grep " D " | wc -l) ++TEST [ $((nofdlog)) -ne 0 ] ++ ++# Unmount V0 ++TEST umount $M0 ++ ++#Mount V1 ++TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++#do some operation ++touch $M0/file3 ++ ++ ++# DEBUG log level is enabled only for V0 so no debug message should be available ++# in log specific to file2 creation except for server xlator, server xlator is ++# common xlator in brick mulitplex ++nofdlog=$(cat $brick_log_file | grep file3 | grep -v server | wc -l) ++TEST [ $((nofdlog)) -eq 0 ] ++ ++# Unmount V1 ++TEST umount $M0 ++ ++cleanup; +diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c +index 41b57c5..aa91a0a 100644 +--- a/xlators/debug/io-stats/src/io-stats.c ++++ b/xlators/debug/io-stats/src/io-stats.c +@@ -3704,19 +3704,23 @@ xlator_set_loglevel(xlator_t *this, int log_level) + active = ctx->active; + top = active->first; + +- if (strcmp(top->type, "protocol/server") || (log_level == -1)) ++ if (log_level == -1) + return; + +- /* Set log-level for server xlator */ +- top->loglevel = log_level; ++ if (ctx->cmd_args.brick_mux) { ++ /* Set log-level for all brick xlators */ ++ top->loglevel = log_level; + +- /* Set log-level for parent xlator */ +- if (this->parents) +- this->parents->xlator->loglevel = log_level; ++ /* Set log-level for parent xlator */ ++ if (this->parents) ++ this->parents->xlator->loglevel = log_level; + +- while (trav) { +- trav->loglevel = log_level; +- trav = trav->next; ++ while (trav) { ++ trav->loglevel = log_level; ++ trav = trav->next; ++ } ++ } else { ++ gf_log_set_loglevel(this->ctx, log_level); + } + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 2dd5f91..fdd7d91 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -2240,6 +2240,9 @@ retry: + if (volinfo->memory_accounting) + runner_add_arg(&runner, "--mem-accounting"); + ++ if (is_brick_mx_enabled()) ++ runner_add_arg(&runner, "--brick-mux"); ++ + runner_log(&runner, "", 0, "Starting GlusterFS"); + + brickinfo->port = port; +@@ -2378,6 +2381,10 @@ unsafe_option(dict_t *this, char *key, data_t *value, void *arg) + return _gf_false; + } + ++ if (fnmatch("*diagnostics.client-log*", key, 0) == 0) { ++ return _gf_false; ++ } ++ + return _gf_true; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch b/SOURCES/0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch new file mode 100644 index 0000000..6788ba8 --- /dev/null +++ b/SOURCES/0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch @@ -0,0 +1,111 @@ +From 55d945603bb52f0787c5200118673d6206ec3492 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Fri, 12 Apr 2019 19:55:10 +0530 +Subject: [PATCH 106/124] libgfchangelog : use find_library to locate shared + library + +Issue: + +libgfchangelog.so: cannot open shared object file + +Due to hardcoded shared library name runtime loader looks for particular version of +a shared library. + +Solution: + +Using find_library to locate shared library at runtime solves this issue. + +Traceback (most recent call last): + File "/usr/libexec/glusterfs/python/syncdaemon/gsyncd.py", line 323, in main + func(args) + File "/usr/libexec/glusterfs/python/syncdaemon/subcmds.py", line 82, in subcmd_worker + local.service_loop(remote) + File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", line 1261, in service_loop + changelog_agent.init() + File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", line 233, in __call__ + return self.ins(self.meth, *a) + File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", line 215, in __call__ + raise res +OSError: libgfchangelog.so: cannot open shared object file: No such file or directory + +>Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22557/ +>Change-Id: I3dd013d701ed1cd99ba7ef20d1898f343e1db8f5 +>fixes: bz#1699394 +>Signed-off-by: Sunny Kumar + +fixes: bz#1699271 +Change-Id: If8b5827cdac658eb3a211109bd397db9a6fee8e6 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/167907 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + geo-replication/syncdaemon/libgfchangelog.py | 3 ++- + tools/glusterfind/src/libgfchangelog.py | 7 +++---- + xlators/features/changelog/lib/examples/python/libgfchangelog.py | 4 +++- + 3 files changed, 8 insertions(+), 6 deletions(-) + +diff --git a/geo-replication/syncdaemon/libgfchangelog.py b/geo-replication/syncdaemon/libgfchangelog.py +index fff9d24..8d12956 100644 +--- a/geo-replication/syncdaemon/libgfchangelog.py ++++ b/geo-replication/syncdaemon/libgfchangelog.py +@@ -10,13 +10,14 @@ + + import os + from ctypes import CDLL, RTLD_GLOBAL, get_errno, byref, c_ulong ++from ctypes.util import find_library + from syncdutils import ChangelogException, ChangelogHistoryNotAvailable + from py2py3 import gr_cl_history_changelog, gr_cl_done, gr_create_string_buffer + from py2py3 import gr_cl_register, gr_cl_history_done, bytearray_to_str + + + class Changes(object): +- libgfc = CDLL("libgfchangelog.so", mode=RTLD_GLOBAL, ++ libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, + use_errno=True) + + @classmethod +diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py +index 1ef177a..513bb10 100644 +--- a/tools/glusterfind/src/libgfchangelog.py ++++ b/tools/glusterfind/src/libgfchangelog.py +@@ -9,8 +9,8 @@ + # cases as published by the Free Software Foundation. + + import os +-from ctypes import CDLL, get_errno, create_string_buffer, c_ulong, byref +-from ctypes import RTLD_GLOBAL ++from ctypes import CDLL, RTLD_GLOBAL, get_errno, create_string_buffer, c_ulong, byref ++from ctypes.util import find_library + from gfind_py2py3 import bytearray_to_str, gf_create_string_buffer + from gfind_py2py3 import gfind_history_changelog, gfind_changelog_register + from gfind_py2py3 import gfind_history_changelog_done +@@ -19,8 +19,7 @@ from gfind_py2py3 import gfind_history_changelog_done + class ChangelogException(OSError): + pass + +- +-libgfc = CDLL("libgfchangelog.so", use_errno=True, mode=RTLD_GLOBAL) ++libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, use_errno=True) + + + def raise_oserr(prefix=None): +diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py +index 2cdbf11..2da9f2d 100644 +--- a/xlators/features/changelog/lib/examples/python/libgfchangelog.py ++++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py +@@ -1,8 +1,10 @@ + import os + from ctypes import * ++from ctypes.util import find_library + + class Changes(object): +- libgfc = CDLL("libgfchangelog.so", mode=RTLD_GLOBAL, use_errno=True) ++ libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, ++ use_errno=True) + + @classmethod + def geterrno(cls): +-- +1.8.3.1 + diff --git a/SOURCES/0107-gfapi-add-function-to-set-client-pid.patch b/SOURCES/0107-gfapi-add-function-to-set-client-pid.patch new file mode 100644 index 0000000..741f2f3 --- /dev/null +++ b/SOURCES/0107-gfapi-add-function-to-set-client-pid.patch @@ -0,0 +1,93 @@ +From 799a74e5e8123cd2e67e9ed5c0f986630a8e0547 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Thu, 14 Mar 2019 18:41:11 +0530 +Subject: [PATCH 107/124] gfapi: add function to set client-pid + +This api offers the ability to set the pid of a client to a particular +value, identical to how gluster fuse clients provide the --client-pid +option. This is an internal API to be used by gluster processes only. See +https://lists.gluster.org/pipermail/gluster-devel/2019-March/055925.html +for more details. Currently glfsheal is the only proposed consumer. + +Patch on upstream master: https://review.gluster.org/#/c/glusterfs/+/22368/ +Change-Id: I0620be2127d79d69cdd57cffb29bba44e6e5da1f +BUG 1676495 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/166459 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + api/src/gfapi.aliases | 1 + + api/src/gfapi.map | 4 ++++ + api/src/glfs-internal.h | 6 ++++++ + api/src/glfs.c | 15 +++++++++++++++ + 4 files changed, 26 insertions(+) + +diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases +index 25e2d74..09c0fd8 100644 +--- a/api/src/gfapi.aliases ++++ b/api/src/gfapi.aliases +@@ -172,6 +172,7 @@ _pub_glfs_upcall_lease_get_lease_type _glfs_upcall_lease_get_lease_type$GFAPI_4. + + _priv_glfs_statx _glfs_statx$GFAPI_6.0 + _priv_glfs_iatt_from_statx _glfs_iatt_from_statx$GFAPI_6.0 ++_priv_glfs_setfspid _glfs_setfspid$GFAPI_6.1 + + _pub_glfs_read_async _glfs_read_async$GFAPI_6.0 + _pub_glfs_write_async _glfs_write_async$GFAPI_6.0 +diff --git a/api/src/gfapi.map b/api/src/gfapi.map +index bb201c7..b97a614 100644 +--- a/api/src/gfapi.map ++++ b/api/src/gfapi.map +@@ -267,3 +267,7 @@ GFAPI_6.0 { + glfs_fsetattr; + } GFAPI_PRIVATE_6.0; + ++GFAPI_PRIVATE_6.1 { ++ global: ++ glfs_setfspid; ++} GFAPI_6.0; +diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h +index 40bbb8a..55401b2 100644 +--- a/api/src/glfs-internal.h ++++ b/api/src/glfs-internal.h +@@ -702,4 +702,10 @@ void + glfs_iatt_from_statx(struct iatt *, const struct glfs_stat *) + GFAPI_PRIVATE(glfs_iatt_from_statx, 6.0); + ++/* ++ * This API is a per thread setting, similar to glfs_setfs{u/g}id, because of ++ * the call to syncopctx_setfspid. ++ */ ++int ++glfs_setfspid(struct glfs *, pid_t) GFAPI_PRIVATE(glfs_setfspid, 6.1); + #endif /* !_GLFS_INTERNAL_H */ +diff --git a/api/src/glfs.c b/api/src/glfs.c +index b741f6e..f4a8e08 100644 +--- a/api/src/glfs.c ++++ b/api/src/glfs.c +@@ -1461,6 +1461,21 @@ invalid_fs: + + GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_ipc, 3.12.0); + ++int ++priv_glfs_setfspid(struct glfs *fs, pid_t pid) ++{ ++ cmd_args_t *cmd_args = NULL; ++ int ret = 0; ++ ++ cmd_args = &fs->ctx->cmd_args; ++ cmd_args->client_pid = pid; ++ cmd_args->client_pid_set = 1; ++ ret = syncopctx_setfspid(&pid); ++ ++ return ret; ++} ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_setfspid, 6.1); ++ + void + pub_glfs_free(void *ptr) + { +-- +1.8.3.1 + diff --git a/SOURCES/0108-afr-add-client-pid-to-all-gf_event-calls.patch b/SOURCES/0108-afr-add-client-pid-to-all-gf_event-calls.patch new file mode 100644 index 0000000..eda9dd9 --- /dev/null +++ b/SOURCES/0108-afr-add-client-pid-to-all-gf_event-calls.patch @@ -0,0 +1,225 @@ +From ba1460a4fee0c41c7d7f7a2043bae37f7e751259 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Fri, 15 Mar 2019 19:31:03 +0530 +Subject: [PATCH 108/124] afr: add client-pid to all gf_event() calls + +client-pid for glustershd is GF_CLIENT_PID_SELF_HEALD +client-pid for glfsheal is GF_CLIENT_PID_GLFS_HEALD + +Patch on upstream master: https://review.gluster.org/#/c/glusterfs/+/22369/ +BUG: 1676495 +Change-Id: Ib3a863af160ff48c822a5e6b0c27c575c9887470 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/166460 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + heal/src/glfs-heal.c | 6 ++++++ + xlators/cluster/afr/src/afr-common.c | 12 ++++++++---- + xlators/cluster/afr/src/afr-self-heal-common.c | 11 +++++++---- + xlators/cluster/afr/src/afr-self-heal-data.c | 4 +++- + xlators/cluster/afr/src/afr-self-heal-entry.c | 5 +++-- + xlators/cluster/afr/src/afr-self-heal-metadata.c | 4 +++- + xlators/cluster/afr/src/afr-self-heal-name.c | 7 ++++--- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 10 ++++++++++ + 8 files changed, 44 insertions(+), 15 deletions(-) + +diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c +index 6030de3..7e37e47 100644 +--- a/heal/src/glfs-heal.c ++++ b/heal/src/glfs-heal.c +@@ -1688,6 +1688,12 @@ main(int argc, char **argv) + goto out; + } + ++ ret = glfs_setfspid(fs, GF_CLIENT_PID_GLFS_HEAL); ++ if (ret) { ++ printf("Setting client pid failed, %s\n", strerror(errno)); ++ goto out; ++ } ++ + ret = glfs_init(fs); + if (ret < 0) { + ret = -errno; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 47a5d3a..3690b84 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -5233,7 +5233,8 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator, + "Subvolume '%s' came back up; " + "going online.", + child_xlator->name); +- gf_event(EVENT_AFR_SUBVOL_UP, "subvol=%s", this->name); ++ gf_event(EVENT_AFR_SUBVOL_UP, "client-pid=%d; subvol=%s", ++ this->ctx->cmd_args.client_pid, this->name); + } else { + *event = GF_EVENT_SOME_DESCENDENT_UP; + } +@@ -5310,7 +5311,8 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx, + "All subvolumes are down. Going " + "offline until at least one of them " + "comes back up."); +- gf_event(EVENT_AFR_SUBVOLS_DOWN, "subvol=%s", this->name); ++ gf_event(EVENT_AFR_SUBVOLS_DOWN, "client-pid=%d; subvol=%s", ++ this->ctx->cmd_args.client_pid, this->name); + } else { + *event = GF_EVENT_SOME_DESCENDENT_DOWN; + } +@@ -5585,12 +5587,14 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2) + if (!had_quorum && has_quorum) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET, + "Client-quorum is met"); +- gf_event(EVENT_AFR_QUORUM_MET, "subvol=%s", this->name); ++ gf_event(EVENT_AFR_QUORUM_MET, "client-pid=%d; subvol=%s", ++ this->ctx->cmd_args.client_pid, this->name); + } + if (had_quorum && !has_quorum) { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL, + "Client-quorum is not met"); +- gf_event(EVENT_AFR_QUORUM_FAIL, "subvol=%s", this->name); ++ gf_event(EVENT_AFR_QUORUM_FAIL, "client-pid=%d; subvol=%s", ++ this->ctx->cmd_args.client_pid, this->name); + } + } + +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 2268761..595bed4 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -383,11 +383,12 @@ out: + uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2), + priv->children[src_idx]->name); + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;type=gfid;file=" + "/%s>;count=2;child-%d=%s;gfid-%d=%s;" + "child-%d=%s;gfid-%d=%s", +- this->name, uuid_utoa(pargfid), bname, child_idx, +- priv->children[child_idx]->name, child_idx, ++ this->ctx->cmd_args.client_pid, this->name, uuid_utoa(pargfid), ++ bname, child_idx, priv->children[child_idx]->name, child_idx, + uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx, + priv->children[src_idx]->name, src_idx, + uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2)); +@@ -2296,11 +2297,13 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + priv->children[i]->name, + uuid_utoa(replies[i].poststat.ia_gfid)); + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;" + "type=file;gfid=%s;" + "ia_type-%d=%s;ia_type-%d=%s", +- this->name, uuid_utoa(replies[i].poststat.ia_gfid), +- first_idx, gf_inode_type_to_str(first.ia_type), i, ++ this->ctx->cmd_args.client_pid, this->name, ++ uuid_utoa(replies[i].poststat.ia_gfid), first_idx, ++ gf_inode_type_to_str(first.ia_type), i, + gf_inode_type_to_str(replies[i].poststat.ia_type)); + ret = -EIO; + goto out; +diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c +index d9a0ee3..18a0334 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-data.c ++++ b/xlators/cluster/afr/src/afr-self-heal-data.c +@@ -537,9 +537,11 @@ __afr_selfheal_data_finalize_source( + replies, AFR_DATA_TRANSACTION); + if (source < 0) { + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;type=data;" + "file=%s", +- this->name, uuid_utoa(inode->gfid)); ++ this->ctx->cmd_args.client_pid, this->name, ++ uuid_utoa(inode->gfid)); + return -EIO; + } + +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index b23ed6a..fc09b4c 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -269,11 +269,12 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this, + gf_inode_type_to_str(replies[src_idx].poststat.ia_type), + priv->children[src_idx]->name); + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;type=file;" + "file=/%s>;count=2;child-%d=%s;type-" + "%d=%s;child-%d=%s;type-%d=%s", +- this->name, uuid_utoa(pargfid), bname, i, +- priv->children[i]->name, i, ++ this->ctx->cmd_args.client_pid, this->name, ++ uuid_utoa(pargfid), bname, i, priv->children[i]->name, i, + gf_inode_type_to_str(replies[i].poststat.ia_type), src_idx, + priv->children[src_idx]->name, src_idx, + gf_inode_type_to_str(replies[src_idx].poststat.ia_type)); +diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c +index a661fcb..ba43341 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c ++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c +@@ -242,9 +242,11 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this, + + if (!priv->metadata_splitbrain_forced_heal) { + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;" + "type=metadata;file=%s", +- this->name, uuid_utoa(inode->gfid)); ++ this->ctx->cmd_args.client_pid, this->name, ++ uuid_utoa(inode->gfid)); + return -EIO; + } + +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index c4df5d4..36640b5 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -222,13 +222,14 @@ afr_selfheal_name_type_mismatch_check(xlator_t *this, struct afr_reply *replies, + gf_inode_type_to_str(inode_type), + priv->children[type_idx]->name); + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;type=file;" + "file=/%s;count=2;" + "child-%d=%s;type-%d=%s;child-%d=%s;" + "type-%d=%s", +- this->name, uuid_utoa(pargfid), bname, i, +- priv->children[i]->name, i, +- gf_inode_type_to_str(inode_type1), type_idx, ++ this->ctx->cmd_args.client_pid, this->name, ++ uuid_utoa(pargfid), bname, i, priv->children[i]->name, ++ i, gf_inode_type_to_str(inode_type1), type_idx, + priv->children[type_idx]->name, type_idx, + gf_inode_type_to_str(inode_type)); + return -EIO; +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 04a4b2e..19eca9f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -324,6 +324,7 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags) + { + int ret = -1; + char glusterd_uuid_option[PATH_MAX] = {0}; ++ char client_pid[32] = {0}; + dict_t *cmdline = NULL; + + cmdline = dict_new(); +@@ -335,6 +336,15 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags) + if (ret < 0) + goto out; + ++ ret = snprintf(client_pid, sizeof(client_pid), "--client-pid=%d", ++ GF_CLIENT_PID_SELF_HEALD); ++ if (ret < 0) ++ goto out; ++ ++ ret = dict_set_str(cmdline, "arg", client_pid); ++ if (ret < 0) ++ goto out; ++ + /* Pass cmdline arguments as key-value pair. The key is merely + * a carrier and is not used. Since dictionary follows LIFO the value + * should be put in reverse order*/ +-- +1.8.3.1 + diff --git a/SOURCES/0109-glusterd-Optimize-glusterd-handshaking-code-path.patch b/SOURCES/0109-glusterd-Optimize-glusterd-handshaking-code-path.patch new file mode 100644 index 0000000..ed912ea --- /dev/null +++ b/SOURCES/0109-glusterd-Optimize-glusterd-handshaking-code-path.patch @@ -0,0 +1,613 @@ +From aff18f761ef64d55635daa9a1d2140fe35632820 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Fri, 29 Mar 2019 11:48:32 +0530 +Subject: [PATCH 109/124] glusterd: Optimize glusterd handshaking code path + +Problem: At the time of handshaking glusterd populate volume + data in a dictionary.While no. of volumes are configured + more than 1500 glusterd takes more than 10 min to generated + the data.Due to taking more time rpc request times out and + rpc start bailing of call frames. + +Solution: To optimize the code done below changes + 1) Spawn multiple threads to populate volumes data in bulk + in separate dictionary and introduce an option + glusterd.brick-dict-thread-count to configure no. of threads + to populate volume data. + 2) Populate tier data only while volume type is tier + 3) Compare snap data only while snap_count is non zero + +> Fixes: bz#1699339 +> Change-Id: I38dc71970c049217f9d1a06fc0aaf4c26eab18f5 +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit 26a19d9da3ab5604db02d4ca02ce868fb57193a4) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22556/) + +Bug: 1652461 +Change-Id: Ia81671a7e1f173bcb32da9dc439be9e61c18bde1 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/167981 +Tested-by: Mohit Agrawal +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + libglusterfs/src/glusterfs/globals.h | 4 +- + tests/bugs/glusterd/bug-1699339.t | 69 ++++++ + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 1 + + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 3 + + xlators/mgmt/glusterd/src/glusterd-utils.c | 269 +++++++++++++++++---- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 55 +++++ + xlators/mgmt/glusterd/src/glusterd.h | 10 + + 7 files changed, 362 insertions(+), 49 deletions(-) + create mode 100644 tests/bugs/glusterd/bug-1699339.t + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 6642ba0..e45db14 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -50,7 +50,7 @@ + 1 /* MIN is the fresh start op-version, mostly \ + should not change */ + #define GD_OP_VERSION_MAX \ +- GD_OP_VERSION_6_0 /* MAX VERSION is the maximum \ ++ GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \ + count in VME table, should \ + keep changing with \ + introduction of newer \ +@@ -134,6 +134,8 @@ + + #define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */ + ++#define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */ ++ + #include "glusterfs/xlator.h" + #include "glusterfs/options.h" + +diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t +new file mode 100644 +index 0000000..3e950f4 +--- /dev/null ++++ b/tests/bugs/glusterd/bug-1699339.t +@@ -0,0 +1,69 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../cluster.rc ++ ++cleanup; ++ ++NUM_VOLS=15 ++ ++ ++get_brick_base () { ++ printf "%s/vol%02d" $B0 $1 ++} ++ ++function count_up_bricks { ++ vol=$1; ++ $CLI_1 --xml volume status $vol | grep '1' | wc -l ++} ++ ++create_volume () { ++ ++ local vol_name=$(printf "%s-vol%02d" $V0 $1) ++ ++ TEST $CLI_1 volume create $vol_name replica 3 $H1:$B1/${vol_name} $H2:$B2/${vol_name} $H3:$B3/${vol_name} ++ TEST $CLI_1 volume start $vol_name ++} ++ ++TEST launch_cluster 3 ++TEST $CLI_1 volume set all cluster.brick-multiplex on ++ ++# The option accepts the value in the range from 5 to 200 ++TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 210 ++TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 4 ++ ++TEST $CLI_1 volume set all glusterd.vol_count_per_thread 5 ++ ++TEST $CLI_1 peer probe $H2; ++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count ++ ++TEST $CLI_1 peer probe $H3; ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++ ++# Our infrastructure can't handle an arithmetic expression here. The formula ++# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other ++# NUM_VOLS-1 and there are 5 such statements in each iteration. ++TESTS_EXPECTED_IN_LOOP=28 ++for i in $(seq 1 $NUM_VOLS); do ++ starttime="$(date +%s)"; ++ create_volume $i ++done ++ ++TEST kill_glusterd 1 ++ ++vol1=$(printf "%s-vol%02d" $V0 1) ++TEST $CLI_2 volume set $vol1 performance.readdir-ahead on ++vol2=$(printf "%s-vol%02d" $V0 2) ++TEST $CLI_2 volume set $vol2 performance.readdir-ahead on ++ ++# Bring back 1st glusterd ++TEST $glusterd_1 ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++ ++EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead ++ ++vol_name=$(printf "%s-vol%02d" $V0 2) ++EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol2 performance.readdir-ahead ++ ++cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 95f9707..94a5e1f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -87,6 +87,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = { + * TBD: Discuss the default value for this. Maybe this should be a + * dynamic value depending on the memory specifications per node */ + {GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE}, ++ {GLUSTERD_VOL_CNT_PER_THRD, GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE}, + /*{GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},*/ + {GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"}, + {NULL}, +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index b3c4158..d225854 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -2099,6 +2099,9 @@ glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername, + goto out; + } + ++ if (!snap_count) ++ goto out; ++ + for (i = 1; i <= snap_count; i++) { + /* Compare one snapshot from peer_data at a time */ + ret = glusterd_compare_snap(peer_data, i, peername, peerid); +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index fdd7d91..ff6102b 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -155,6 +155,47 @@ out: + return ret; + } + ++int ++get_gd_vol_thread_limit(int *thread_limit) ++{ ++ char *value = NULL; ++ int ret = -1; ++ int vol_per_thread_limit = 0; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ if (!is_brick_mx_enabled()) { ++ vol_per_thread_limit = 1; ++ ret = 0; ++ goto out; ++ } ++ ++ ret = dict_get_strn(priv->opts, GLUSTERD_VOL_CNT_PER_THRD, ++ SLEN(GLUSTERD_VOL_CNT_PER_THRD), &value); ++ if (ret) { ++ value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE; ++ } ++ ret = gf_string2int(value, &vol_per_thread_limit); ++ if (ret) ++ goto out; ++ ++out: ++ *thread_limit = vol_per_thread_limit; ++ ++ gf_msg_debug("glusterd", 0, ++ "Per Thread volume limit set to %d glusterd to populate dict " ++ "data parallel", ++ *thread_limit); ++ ++ return ret; ++} ++ + extern struct volopt_map_entry glusterd_volopt_map[]; + extern glusterd_all_vol_opts valid_all_vol_opts[]; + +@@ -3070,50 +3111,55 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, + + /* tiering related variables */ + +- snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count); +- if (ret) +- goto out; ++ if (volinfo->type == GF_CLUSTER_TYPE_TIER) { ++ snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_disperse_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count); ++ ret = dict_set_uint32(dict, key, ++ volinfo->tier_info.cold_disperse_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_redundancy_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count); ++ ret = dict_set_uint32(dict, key, ++ volinfo->tier_info.cold_redundancy_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_dist_leaf_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count); ++ ret = dict_set_uint32(dict, key, ++ volinfo->tier_info.cold_dist_leaf_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count); ++ if (ret) ++ goto out; ++ } + + snprintf(key, sizeof(key), "%s%d", prefix, count); + ret = gd_add_vol_snap_details_to_dict(dict, key, volinfo); +@@ -3363,33 +3409,40 @@ out: + return ret; + } + +-int32_t +-glusterd_add_volumes_to_export_dict(dict_t **peer_data) ++void * ++glusterd_add_bulk_volumes_create_thread(void *data) + { + int32_t ret = -1; +- dict_t *dict = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int32_t count = 0; +- glusterd_dict_ctx_t ctx = {0}; + xlator_t *this = NULL; ++ glusterd_add_dict_args_t *arg = NULL; ++ dict_t *dict = NULL; ++ int start = 0; ++ int end = 0; + +- this = THIS; +- GF_ASSERT(this); ++ GF_ASSERT(data); ++ ++ arg = data; ++ dict = arg->voldict; ++ start = arg->start; ++ end = arg->end; ++ this = arg->this; ++ THIS = arg->this; + priv = this->private; + GF_ASSERT(priv); + +- dict = dict_new(); +- if (!dict) +- goto out; +- + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + count++; ++ if ((count < start) || (count > end)) ++ continue; ++ + ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); + if (ret) + goto out; +- if (!glusterd_is_volume_quota_enabled(volinfo)) ++ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) + continue; + ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count, + "volume"); +@@ -3397,7 +3450,122 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + goto out; + } + +- ret = dict_set_int32n(dict, "count", SLEN("count"), count); ++out: ++ GF_ATOMIC_DEC(priv->thread_count); ++ free(arg); ++ return NULL; ++} ++ ++int32_t ++glusterd_add_volumes_to_export_dict(dict_t **peer_data) ++{ ++ int32_t ret = -1; ++ dict_t *dict = NULL; ++ dict_t *dict_arr[128] = { ++ 0, ++ }; ++ glusterd_conf_t *priv = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ int32_t count = 0; ++ glusterd_dict_ctx_t ctx = {0}; ++ xlator_t *this = NULL; ++ int totthread = 0; ++ int volcnt = 0; ++ int start = 1; ++ int endindex = 0; ++ int vol_per_thread_limit = 0; ++ glusterd_add_dict_args_t *arg = NULL; ++ pthread_t th_id = { ++ 0, ++ }; ++ int th_ret = 0; ++ int i = 0; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ priv = this->private; ++ GF_ASSERT(priv); ++ ++ dict = dict_new(); ++ if (!dict) ++ goto out; ++ ++ /* Count the total number of volumes */ ++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++; ++ ++ get_gd_vol_thread_limit(&vol_per_thread_limit); ++ ++ if ((vol_per_thread_limit == 1) || (vol_per_thread_limit > 100)) { ++ totthread = 0; ++ } else { ++ totthread = volcnt / vol_per_thread_limit; ++ endindex = volcnt % vol_per_thread_limit; ++ if (endindex) ++ totthread++; ++ } ++ ++ if (totthread == 0) { ++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) ++ { ++ count++; ++ ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); ++ if (ret) ++ goto out; ++ ++ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) ++ continue; ++ ++ ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count, ++ "volume"); ++ if (ret) ++ goto out; ++ } ++ } else { ++ for (i = 0; i < totthread; i++) { ++ arg = calloc(1, sizeof(*arg)); ++ dict_arr[i] = dict_new(); ++ arg->this = this; ++ arg->voldict = dict_arr[i]; ++ arg->start = start; ++ if (!endindex) { ++ arg->end = ((i + 1) * vol_per_thread_limit); ++ } else { ++ arg->end = (start + endindex); ++ } ++ th_ret = gf_thread_create_detached( ++ &th_id, glusterd_add_bulk_volumes_create_thread, arg, ++ "bulkvoldict"); ++ if (th_ret) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "glusterd_add_bulk_volume %s" ++ " thread creation failed", ++ "bulkvoldict"); ++ free(arg); ++ goto out; ++ } ++ ++ start = start + vol_per_thread_limit; ++ GF_ATOMIC_INC(priv->thread_count); ++ gf_log(this->name, GF_LOG_INFO, ++ "Create thread %d to populate dict data for volume" ++ " start index is %d end index is %d", ++ (i + 1), arg->start, arg->end); ++ } ++ while (GF_ATOMIC_GET(priv->thread_count)) { ++ sleep(1); ++ } ++ ++ gf_log(this->name, GF_LOG_INFO, ++ "Finished dictionary popluation in all threads"); ++ for (i = 0; i < totthread; i++) { ++ dict_copy_with_ref(dict_arr[i], dict); ++ dict_unref(dict_arr[i]); ++ } ++ gf_log(this->name, GF_LOG_INFO, ++ "Finished merger of all dictionraies into single one"); ++ } ++ ++ ret = dict_set_int32n(dict, "count", SLEN("count"), volcnt); + if (ret) + goto out; + +@@ -3499,6 +3667,9 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + goto out; + } + ++ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) ++ goto skip_quota; ++ + snprintf(key, sizeof(key), "volume%d.quota-version", count); + ret = dict_get_uint32(peer_data, key, "a_version); + if (ret) { +@@ -3550,6 +3721,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + goto out; + } + } ++ ++skip_quota: + *status = GLUSTERD_VOL_COMP_SCS; + + out: +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 42ca9bb..10aa2ae 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -1058,6 +1058,51 @@ out: + } + + static int ++validate_volume_per_thread_limit(glusterd_volinfo_t *volinfo, dict_t *dict, ++ char *key, char *value, char **op_errstr) ++{ ++ xlator_t *this = NULL; ++ uint val = 0; ++ int ret = -1; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ ++ if (!is_brick_mx_enabled()) { ++ gf_asprintf(op_errstr, ++ "Brick-multiplexing is not enabled. " ++ "Please enable brick multiplexing before trying " ++ "to set this option."); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_WRONG_OPTS_SETTING, "%s", ++ *op_errstr); ++ goto out; ++ } ++ ++ ret = gf_string2uint(value, &val); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "%s is not a valid count. " ++ "%s expects an unsigned integer.", ++ value, key); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", ++ *op_errstr); ++ } ++ ++ if ((val < 5) || (val > 200)) { ++ gf_asprintf( ++ op_errstr, ++ "Please set this option to a greater than 5 or less than 200 " ++ "to optimize dict generated while no. of volumes are more"); ++ ret = -1; ++ goto out; ++ } ++out: ++ gf_msg_debug("glusterd", 0, "Returning %d", ret); ++ ++ return ret; ++} ++ ++static int + validate_boolean(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) + { +@@ -3520,6 +3565,16 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "brick multiplexing. Brick multiplexing ensures that " + "compatible brick instances can share one single " + "brick process."}, ++ {.key = GLUSTERD_VOL_CNT_PER_THRD, ++ .voltype = "mgmt/glusterd", ++ .value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE, ++ .op_version = GD_OP_VERSION_7_0, ++ .validate_fn = validate_volume_per_thread_limit, ++ .type = GLOBAL_NO_DOC, ++ .description = ++ "This option can be used to limit the number of volumes " ++ "handled by per thread to populate peer data.The option accepts " ++ " the value in the range of 5 to 200"}, + {.key = GLUSTERD_BRICKMUX_LIMIT_KEY, + .voltype = "mgmt/glusterd", + .value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE, +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 0ac6e63..bd9f509 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -57,8 +57,10 @@ + #define GLUSTER_SHARED_STORAGE "gluster_shared_storage" + #define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage" + #define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex" ++#define GLUSTERD_VOL_CNT_PER_THRD "glusterd.vol_count_per_thread" + #define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process" + #define GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE "250" ++#define GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE "100" + #define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging" + #define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level" + +@@ -225,8 +227,16 @@ typedef struct { + which might lead the modification of volinfo + list. + */ ++ gf_atomic_t thread_count; + } glusterd_conf_t; + ++typedef struct glusterd_add_dict_args { ++ xlator_t *this; ++ dict_t *voldict; ++ int start; ++ int end; ++} glusterd_add_dict_args_t; ++ + typedef enum gf_brick_status { + GF_BRICK_STOPPED, + GF_BRICK_STARTED, +-- +1.8.3.1 + diff --git a/SOURCES/0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch b/SOURCES/0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch new file mode 100644 index 0000000..eedac5e --- /dev/null +++ b/SOURCES/0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch @@ -0,0 +1,108 @@ +From 6e7d333625ecd9f7402c2e839338350fa86eaf45 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham +Date: Tue, 16 Apr 2019 17:07:37 +0530 +Subject: [PATCH 110/124] tier/shd/glusterd: with shd mux, the shd volfile path + have to be updated for tier-heald.t + +The volfile path for glustershd has been changed to volume based +from node based with the shd mux. And those changes for the +tier-heald.t test case have been made in this patch. + +label: DOWNSTREAM ONLY + +Change-Id: I0137f7e02c2bf3721dd51c6dfb215cd81b31d6ef +Signed-off-by: Hari Gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/168038 +Reviewed-by: Rafi Kavungal Chundattu Parambil +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/basic/tier/tier-heald.t | 35 ++++++++++++++++++++--------------- + 1 file changed, 20 insertions(+), 15 deletions(-) + +diff --git a/tests/basic/tier/tier-heald.t b/tests/basic/tier/tier-heald.t +index a8e634f..0ec9e43 100644 +--- a/tests/basic/tier/tier-heald.t ++++ b/tests/basic/tier/tier-heald.t +@@ -11,7 +11,7 @@ cleanup; + TEST glusterd + TEST pidof glusterd + +-volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol" ++r2_volfile=$(gluster system:: getwd)"/vols/r2/r2-shd.vol" + + # Commands should fail when both tiers are not of distribute type. + # Glustershd shouldn't be running as long as there are no replicate/disperse +@@ -34,51 +34,56 @@ TEST $CLI volume tier r2 attach $H0:$B0/r2_hot + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal r2 enable + EXPECT "enable" volume_option r2 "cluster.self-heal-daemon" +-EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT "enable" volgen_volume_option $r2_volfile r2-replicate-0 cluster replicate self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal r2 disable + EXPECT "disable" volume_option r2 "cluster.self-heal-daemon" +-EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT "disable" volgen_volume_option $r2_volfile r2-replicate-0 cluster replicate self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + # Commands should work on disperse volume. + TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2 + TEST $CLI volume start ec2 + ++ec2_volfile=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol" ++ + TEST $CLI volume tier ec2 attach replica 2 $H0:$B0/ec2_hot{1..4} + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal ec2 enable + EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT "enable" volgen_volume_option $ec2_volfile ec2-disperse-0 cluster disperse self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal ec2 disable + EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT "disable" volgen_volume_option $ec2_volfile ec2-disperse-0 cluster disperse self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + + #Check that shd graph is rewritten correctly on volume stop/start +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate + TEST $CLI volume stop r2 +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse ++ ++# Has been commented as the validations after stop using volfile dont hold true. ++#EXPECT "N" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate + TEST $CLI volume stop ec2 + # When both the volumes are stopped glustershd volfile is not modified just the + # process is stopped + TEST "[ -z $(get_shd_process_pid) ]" + + TEST $CLI volume start r2 +-EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++# Has been commented as the validations after stop using volfile dont hold true. ++#EXPECT "N" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate + + TEST $CLI volume start ec2 + +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-replicate-0 cluster replicate + + TEST $CLI volume tier ec2 detach force + +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "N" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse ++EXPECT "N" volgen_volume_exists $ec2_volfile ec2-replicate-0 cluster replicate + + TEST $CLI volume set r2 self-heal-daemon on + TEST $CLI volume set r2 cluster.self-heal-daemon off +-- +1.8.3.1 + diff --git a/SOURCES/0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch b/SOURCES/0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch new file mode 100644 index 0000000..90a25b3 --- /dev/null +++ b/SOURCES/0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch @@ -0,0 +1,49 @@ +From 310e09d46cdb293e4af2df0085b8ac45d5c17933 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 16 Apr 2019 17:20:34 +0530 +Subject: [PATCH 111/124] glusterd: fix loading ctime in client graph logic + +Commit efbf8ab wasn't handling all the scenarios of toggling ctime +option correctly and more over a ! had completely tossed up the logic. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22578/ + +>Fixes: bz#1697907 +>Change-Id: If12e2f69045e59878992ee2cd0518cc0eabcce0d +>Signed-off-by: Atin Mukherjee + +BUG: 1697820 +Change-Id: If12e2f69045e59878992ee2cd0518cc0eabcce0d +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/168048 +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 324ec2f..da877aa 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4358,9 +4358,15 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + goto out; + } + } +- +- if (conf->op_version >= GD_OP_VERSION_5_0 && +- !dict_get_str_boolean(set_dict, "features.ctime", _gf_false)) { ++ /* a. ret will be -1 if features.ctime is not set in the volinfo->dict which ++ * means ctime should be loaded into the graph. ++ * b. ret will be 1 if features.ctime is explicitly turned on through ++ * volume set and in that case ctime should be loaded into the graph. ++ * c. ret will be 0 if features.ctime is explicitly turned off and in that ++ * case ctime shouldn't be loaded into the graph. ++ */ ++ ret = dict_get_str_boolean(set_dict, "features.ctime", -1); ++ if (conf->op_version >= GD_OP_VERSION_5_0 && ret) { + xl = volgen_graph_add(graph, "features/utime", volname); + if (!xl) { + ret = -1; +-- +1.8.3.1 + diff --git a/SOURCES/0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch b/SOURCES/0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch new file mode 100644 index 0000000..ddcb82c --- /dev/null +++ b/SOURCES/0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch @@ -0,0 +1,45 @@ +From 1df830953b9a09404f9ca6a0539172e9f23ecbf4 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Wed, 17 Apr 2019 15:13:12 +0530 +Subject: [PATCH 112/124] geo-rep : fix incorrectly formatted authorized_keys + +Problem : While Geo-rep setup when creating an ssh authorized_keys + the geo-rep setup inserts an extra space before the "ssh-rsa" label. + This gets flagged by an enterprise customer's security scan as a + security violation. + +Solution: Remove extra space in GSYNCD_CMD & TAR_CMD. + +>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22246/ +>Change-Id: I956f938faef0e0883703bbc337b1dc2770e4a921 +>fixes: bz#1679401 +>Signed-off-by: Sunny Kumar + +BUG: 1671862 +Change-Id: I194a2bddcf2ee9b8286b204f8c4da5c480a528b3 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/168144 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/src/peer_georep-sshkey.py.in | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/geo-replication/src/peer_georep-sshkey.py.in b/geo-replication/src/peer_georep-sshkey.py.in +index 2196fd7..58696e9 100644 +--- a/geo-replication/src/peer_georep-sshkey.py.in ++++ b/geo-replication/src/peer_georep-sshkey.py.in +@@ -30,8 +30,8 @@ from prettytable import PrettyTable + + SECRET_PEM = "@GLUSTERD_WORKDIR@/geo-replication/secret.pem" + TAR_SSH_PEM = "@GLUSTERD_WORKDIR@/geo-replication/tar_ssh.pem" +-GSYNCD_CMD = 'command="@GLUSTERFS_LIBEXECDIR@/gsyncd" ' +-TAR_CMD = 'command="tar ${SSH_ORIGINAL_COMMAND#* }" ' ++GSYNCD_CMD = 'command="@GLUSTERFS_LIBEXECDIR@/gsyncd" ' ++TAR_CMD = 'command="tar ${SSH_ORIGINAL_COMMAND#* }" ' + COMMON_SECRET_FILE = "@GLUSTERD_WORKDIR@/geo-replication/common_secret.pem.pub" + + +-- +1.8.3.1 + diff --git a/SOURCES/0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch b/SOURCES/0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch new file mode 100644 index 0000000..9f53a1e --- /dev/null +++ b/SOURCES/0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch @@ -0,0 +1,71 @@ +From 850d5418fb48417d94ab17e565b2184ba951ccbe Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Wed, 17 Apr 2019 18:04:44 +0530 +Subject: [PATCH 113/124] spec: Glusterd did not start by default after node + reboot + +Problem: After install gluster rpms glusterd service is not enabled + so systemctl status is showing "disabled" + +Solution: Update glusterfs.spec.in to enable glusterd after install + gluster rpms + +label: DOWNSTREAM ONLY +BUG: 1699835 + +Change-Id: Ied9be5dfb1bf3bda24868722b1fbd77cb1c1d18c +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/168168 +Reviewed-by: Kaleb Keithley +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + glusterfs.spec.in | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index cb17eaa..ba095b7 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -168,6 +168,8 @@ + %endif + + %if ( 0%{?_with_systemd:1} ) ++%global service_enable() /bin/systemctl --quiet enable %1.service || : \ ++%{nil} + %global service_start() /bin/systemctl --quiet start %1.service || : \ + %{nil} + %global service_stop() /bin/systemctl --quiet stop %1.service || :\ +@@ -181,7 +183,7 @@ + %global glustereventsd_svcfile %{_unitdir}/glustereventsd.service + %global glusterfssharedstorage_svcfile %{_unitdir}/glusterfssharedstorage.service + %else +-%global systemd_post() /sbin/chkconfig --add %1 >/dev/null 2>&1 || : \ ++%global service_enable() /sbin/chkconfig --add %1 >/dev/null 2>&1 || : \ + %{nil} + %global systemd_preun() /sbin/chkconfig --del %1 >/dev/null 2>&1 || : \ + %{nil} +@@ -926,7 +928,7 @@ exit 0 + + %if ( 0%{!?_without_events:1} ) + %post events +-%systemd_post glustereventsd ++%service_enable glustereventsd + %endif + + %if ( 0%{!?_without_server:1} ) +@@ -951,9 +953,9 @@ exit 0 + %if ( 0%{!?_without_server:1} ) + %post server + # Legacy server +-%systemd_post glusterd ++%service_enable glusterd + %if ( 0%{_for_fedora_koji_builds} ) +-%systemd_post glusterfsd ++%service_enable glusterfsd + %endif + # ".cmd_log_history" is renamed to "cmd_history.log" in GlusterFS-3.7 . + # While upgrading glusterfs-server package form GlusterFS version <= 3.6 to +-- +1.8.3.1 + diff --git a/SOURCES/0114-core-fix-hang-issue-in-__gf_free.patch b/SOURCES/0114-core-fix-hang-issue-in-__gf_free.patch new file mode 100644 index 0000000..7e26642 --- /dev/null +++ b/SOURCES/0114-core-fix-hang-issue-in-__gf_free.patch @@ -0,0 +1,46 @@ +From da53d9027d9426c0023176a42e0550d6ccccc941 Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Mon, 22 Apr 2019 21:18:30 +0530 +Subject: [PATCH 114/124] core: fix hang issue in __gf_free + +Currently GF_ASSERT is done under mem_accounting lock at some places. +On a GF_ASSERT failure, gf_msg_callingfn is called which calls gf_malloc +internally and it takes the same mem_accounting lock leading to deadlock. + +This is a temporary fix to avoid any hang issue in master. +https://review.gluster.org/#/c/glusterfs/+/22589/ is being worked on +in the mean while so that GF_ASSERT can be used under mem_accounting +lock. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22600/ + +>Change-Id: I6d67f23979e7edd2695bdc6aab2997dae4a4060a +>updates: bz#1700865 +>Signed-off-by: Susant Palai + +Change-Id: I6d67f23979e7edd2695bdc6aab2997dae4a4060a +BUG: 1698728 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/168474 +Tested-by: RHGS Build Bot +Reviewed-by: Nithya Balachandran +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/mem-pool.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 81badc0..34cb87a 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -348,7 +348,6 @@ __gf_free(void *free_ptr) + + LOCK(&mem_acct->rec[header->type].lock); + { +- GF_ASSERT(mem_acct->rec[header->type].size >= header->size); + mem_acct->rec[header->type].size -= header->size; + mem_acct->rec[header->type].num_allocs--; + /* If all the instances are freed up then ensure typestr is set +-- +1.8.3.1 + diff --git a/SOURCES/0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch b/SOURCES/0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch new file mode 100644 index 0000000..b53ff91 --- /dev/null +++ b/SOURCES/0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch @@ -0,0 +1,56 @@ +From 4901fcc0cc507accf30e1a4bdd020a5676488751 Mon Sep 17 00:00:00 2001 +From: Niels de Vos +Date: Mon, 8 Apr 2019 12:14:34 +0200 +Subject: [PATCH 115/124] core: only log seek errors if SEEK_HOLE/SEEK_DATA is + available + +On RHEL-6 there is no support for SEEK_HOLE/SEEK_DATA and this causes +the POSIX xlator to return errno=EINVAL. Because of this, the rpc-server +xlator will log all 'failed' seek attempts. When applications call +seek() often, the brick logs can grow very quickly and fill up the +disks. + +Messages that get logged are like +[server-rpc-fops.c:2091:server_seek_cbk] 0-vol01-server: 4947: SEEK-2 (53920aee-062c-4598-aa50-2b4d7821b204), client: worker.example.com-7808-2019/02/08-18:04:57:903430-vol01-client-0-0-0, error-xlator: vol01-posix [Invalid argument] + +The problem can be reproduced by running a Gluster Server on RHEL-6, +with a client running on RHEL-7. The client should execute an +application that calls lseek() with SEEK_HOLE/SEEK_DATA. + +>Change-Id: I7b6c16f8e0ba1a183e845cfdb8d5a3f8caeab138 +>Fixes: bz#1697316 +>Signed-off-by: Niels de Vos + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22526/ + +BUG: 1696903 +Change-Id: I7b6c16f8e0ba1a183e845cfdb8d5a3f8caeab138 +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/168527 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + libglusterfs/src/common-utils.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c +index a0c83c0..70d5d21 100644 +--- a/libglusterfs/src/common-utils.c ++++ b/libglusterfs/src/common-utils.c +@@ -4500,9 +4500,13 @@ fop_log_level(glusterfs_fop_t fop, int op_errno) + return GF_LOG_DEBUG; + + if (fop == GF_FOP_SEEK) { ++#ifdef HAVE_SEEK_HOLE + if (op_errno == ENXIO) { + return GF_LOG_DEBUG; + } ++#else ++ return GF_LOG_DEBUG; ++#endif + } + + return GF_LOG_ERROR; +-- +1.8.3.1 + diff --git a/SOURCES/0116-cluster-ec-fix-fd-reopen.patch b/SOURCES/0116-cluster-ec-fix-fd-reopen.patch new file mode 100644 index 0000000..5426c70 --- /dev/null +++ b/SOURCES/0116-cluster-ec-fix-fd-reopen.patch @@ -0,0 +1,1931 @@ +From e33b3e0a443d4a54634a664f2d499a3fce9e7fb4 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Tue, 16 Apr 2019 14:19:47 +0530 +Subject: [PATCH 116/124] cluster/ec: fix fd reopen + +Currently EC tries to reopen fd's that have been opened while a brick +was down. This is done as part of regular write operations, just after +having acquired the locks, and it's sent as a sub-fop of the main write +fop. + +There were two problems: + +1. The reopen was attempted on all UP bricks, even if a previous lock +didn't succeed. This is incorrect because most probably the open will +fail. + +2. If reopen is sent and fails, the error is propagated to the main +operation, causing it to fail when it shouldn't. + +To fix this, we only attempt reopens on bricks where the current fop +owns a lock, and we prevent any error to be propagated to the main +fop. + +To implement this behaviour an argument used to indicate the minimum +number of required answers has overloaded to also include some flags. To +make the change consistent, it has been necessary to rename the +argument, which means that a lot of files have been changed. However +there are no functional changes. + +This change has also uncovered a problem in discard code, which didn't +correctely process requests of small sizes because no real discard fop +was being processed, only a write of 0's on some region. In this case +some fields of the fop remained uninitialized or with incorrect values. +To fix this, a new function has been created to simulate success on a +fop and it's used in the discard case. + +Thanks to Pranith for providing a test script that has also detected an +issue in this patch. This patch includes a small modification of this +script to force data to be written into bricks before stopping them. + +Upstream patch: https://review.gluster.org/22574 +> Change-Id: I7ccd1fc5fc134eeb6d443c755962a20819320d48 +> BUG: bz#1699866 +> Signed-off-by: Pranith Kumar K + +Upstream patch: https://review.gluster.org/22558 +> Change-Id: If272343873369186c2fb8f43c1d9c52c3ea304ec +> BUG: bz#1699866 +> Signed-off-by: Xavi Hernandez + +Change-Id: If272343873369186c2fb8f43c1d9c52c3ea304ec +Fixes: bz#1663375 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/168522 +Tested-by: RHGS Build Bot +Reviewed-by: Pranith Kumar Karampuri +--- + tests/basic/ec/self-heal-read-write-fail.t | 69 +++++++++++++ + tests/bugs/ec/bug-1699866-check-reopen-fd.t | 34 +++++++ + xlators/cluster/ec/src/ec-common.c | 73 ++++++++++---- + xlators/cluster/ec/src/ec-common.h | 14 ++- + xlators/cluster/ec/src/ec-data.c | 7 +- + xlators/cluster/ec/src/ec-data.h | 2 +- + xlators/cluster/ec/src/ec-dir-read.c | 12 +-- + xlators/cluster/ec/src/ec-dir-write.c | 52 +++++----- + xlators/cluster/ec/src/ec-fops.h | 144 ++++++++++++++-------------- + xlators/cluster/ec/src/ec-generic.c | 54 ++++++----- + xlators/cluster/ec/src/ec-heal.c | 20 ++-- + xlators/cluster/ec/src/ec-inode-read.c | 58 +++++------ + xlators/cluster/ec/src/ec-inode-write.c | 74 +++++++------- + xlators/cluster/ec/src/ec-locks.c | 36 +++---- + xlators/cluster/ec/src/ec-types.h | 11 ++- + xlators/cluster/ec/src/ec.c | 45 +++++---- + 16 files changed, 431 insertions(+), 274 deletions(-) + create mode 100644 tests/basic/ec/self-heal-read-write-fail.t + create mode 100644 tests/bugs/ec/bug-1699866-check-reopen-fd.t + +diff --git a/tests/basic/ec/self-heal-read-write-fail.t b/tests/basic/ec/self-heal-read-write-fail.t +new file mode 100644 +index 0000000..0ba591b +--- /dev/null ++++ b/tests/basic/ec/self-heal-read-write-fail.t +@@ -0,0 +1,69 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++#This test verifies that self-heal fails when read/write fails as part of heal ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 ++TEST touch $M0/a ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++echo abc >> $M0/a ++ ++# Umount the volume to force all pending writes to reach the bricks ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++#Load error-gen and fail read fop and test that heal fails ++TEST $CLI volume stop $V0 #Stop volume so that error-gen can be loaded ++TEST $CLI volume set $V0 debug.error-gen posix ++TEST $CLI volume set $V0 debug.error-fops read ++TEST $CLI volume set $V0 debug.error-number EBADF ++TEST $CLI volume set $V0 debug.error-failure 100 ++ ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++TEST ! getfattr -n trusted.ec.heal $M0/a ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++ ++#fail write fop and test that heal fails ++TEST $CLI volume stop $V0 ++TEST $CLI volume set $V0 debug.error-fops write ++ ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++TEST ! getfattr -n trusted.ec.heal $M0/a ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++ ++TEST $CLI volume stop $V0 #Stop volume so that error-gen can be disabled ++TEST $CLI volume reset $V0 debug.error-gen ++TEST $CLI volume reset $V0 debug.error-fops ++TEST $CLI volume reset $V0 debug.error-number ++TEST $CLI volume reset $V0 debug.error-failure ++ ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++TEST getfattr -n trusted.ec.heal $M0/a ++EXPECT "^0$" get_pending_heal_count $V0 ++ ++#Test that heal worked as expected by forcing read from brick0 ++#remount to make sure data is not served from any cache ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++TEST kill_brick $V0 $H0 $B0/${V0}2 ++EXPECT "abc" cat $M0/a ++ ++cleanup +diff --git a/tests/bugs/ec/bug-1699866-check-reopen-fd.t b/tests/bugs/ec/bug-1699866-check-reopen-fd.t +new file mode 100644 +index 0000000..4386d01 +--- /dev/null ++++ b/tests/bugs/ec/bug-1699866-check-reopen-fd.t +@@ -0,0 +1,34 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fileio.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume set $V0 disperse.background-heals 0 ++TEST $CLI volume set $V0 write-behind off ++TEST $CLI volume set $V0 open-behind off ++TEST $CLI volume start $V0 ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++ ++TEST mkdir -p $M0/dir ++ ++fd="$(fd_available)" ++ ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0 ++ ++TEST fd_open ${fd} rw $M0/dir/test ++TEST fd_write ${fd} "test1" ++TEST $CLI volume replace-brick ${V0} $H0:$B0/${V0}0 $H0:$B0/${V0}0_1 commit force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++TEST fd_write ${fd} "test2" ++TEST fd_close ${fd} ++ ++cleanup +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 5183680..1454ae2 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -44,16 +44,16 @@ ec_update_fd_status(fd_t *fd, xlator_t *xl, int idx, int32_t ret_status) + UNLOCK(&fd->lock); + } + +-static int +-ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open) ++static uintptr_t ++ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t mask) + { + int i = 0; + int count = 0; + ec_t *ec = NULL; + ec_fd_t *fd_ctx = NULL; ++ uintptr_t need_open = 0; + + ec = this->private; +- *need_open = 0; + + fd_ctx = ec_fd_get(fd, this); + if (!fd_ctx) +@@ -63,9 +63,9 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open) + { + for (i = 0; i < ec->nodes; i++) { + if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) && +- (ec->xl_up & (1 << i))) { ++ ((ec->xl_up & (1 << i)) != 0) && ((mask & (1 << i)) != 0)) { + fd_ctx->fd_status[i] = EC_FD_OPENING; +- *need_open |= (1 << i); ++ need_open |= (1 << i); + count++; + } + } +@@ -76,10 +76,11 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open) + * then ignore fixing the fd as it has been + * requested from heal operation. + */ +- if (count >= ec->fragments) +- count = 0; ++ if (count >= ec->fragments) { ++ need_open = 0; ++ } + +- return count; ++ return need_open; + } + + static gf_boolean_t +@@ -96,9 +97,8 @@ ec_is_fd_fixable(fd_t *fd) + } + + static void +-ec_fix_open(ec_fop_data_t *fop) ++ec_fix_open(ec_fop_data_t *fop, uintptr_t mask) + { +- int call_count = 0; + uintptr_t need_open = 0; + int ret = 0; + loc_t loc = { +@@ -109,9 +109,10 @@ ec_fix_open(ec_fop_data_t *fop) + goto out; + + /* Evaluate how many remote fd's to be opened */ +- call_count = ec_fd_ctx_need_open(fop->fd, fop->xl, &need_open); +- if (!call_count) ++ need_open = ec_fd_ctx_need_open(fop->fd, fop->xl, mask); ++ if (need_open == 0) { + goto out; ++ } + + loc.inode = inode_ref(fop->fd->inode); + gf_uuid_copy(loc.gfid, fop->fd->inode->gfid); +@@ -121,11 +122,13 @@ ec_fix_open(ec_fop_data_t *fop) + } + + if (IA_IFDIR == fop->fd->inode->ia_type) { +- ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL, ++ ec_opendir(fop->frame, fop->xl, need_open, ++ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, + &fop->loc[0], fop->fd, NULL); + } else { +- ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL, +- &loc, fop->fd->flags, fop->fd, NULL); ++ ec_open(fop->frame, fop->xl, need_open, ++ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc, ++ fop->fd->flags, fop->fd, NULL); + } + + out: +@@ -495,12 +498,16 @@ ec_resume(ec_fop_data_t *fop, int32_t error) + } + + void +-ec_resume_parent(ec_fop_data_t *fop, int32_t error) ++ec_resume_parent(ec_fop_data_t *fop) + { + ec_fop_data_t *parent; ++ int32_t error = 0; + + parent = fop->parent; + if (parent != NULL) { ++ if ((fop->fop_flags & EC_FOP_NO_PROPAGATE_ERROR) == 0) { ++ error = fop->error; ++ } + ec_trace("RESUME_PARENT", fop, "error=%u", error); + fop->parent = NULL; + ec_resume(parent, error); +@@ -593,6 +600,8 @@ ec_internal_op(ec_fop_data_t *fop) + return _gf_true; + if (fop->id == GF_FOP_FXATTROP) + return _gf_true; ++ if (fop->id == GF_FOP_OPEN) ++ return _gf_true; + return _gf_false; + } + +@@ -631,7 +640,7 @@ ec_msg_str(ec_fop_data_t *fop) + return fop->errstr; + } + +-int32_t ++static int32_t + ec_child_select(ec_fop_data_t *fop) + { + ec_t *ec = fop->xl->private; +@@ -693,8 +702,6 @@ ec_child_select(ec_fop_data_t *fop) + return 0; + } + +- ec_sleep(fop); +- + return 1; + } + +@@ -773,6 +780,8 @@ ec_dispatch_one(ec_fop_data_t *fop) + ec_dispatch_start(fop); + + if (ec_child_select(fop)) { ++ ec_sleep(fop); ++ + fop->expected = 1; + fop->first = ec_select_first_by_read_policy(fop->xl->private, fop); + +@@ -807,6 +816,8 @@ ec_dispatch_inc(ec_fop_data_t *fop) + ec_dispatch_start(fop); + + if (ec_child_select(fop)) { ++ ec_sleep(fop); ++ + fop->expected = gf_bits_count(fop->remaining); + fop->first = 0; + +@@ -820,6 +831,8 @@ ec_dispatch_all(ec_fop_data_t *fop) + ec_dispatch_start(fop); + + if (ec_child_select(fop)) { ++ ec_sleep(fop); ++ + fop->expected = gf_bits_count(fop->remaining); + fop->first = 0; + +@@ -838,6 +851,8 @@ ec_dispatch_min(ec_fop_data_t *fop) + ec_dispatch_start(fop); + + if (ec_child_select(fop)) { ++ ec_sleep(fop); ++ + fop->expected = count = ec->fragments; + fop->first = ec_select_first_by_read_policy(fop->xl->private, fop); + idx = fop->first - 1; +@@ -852,6 +867,23 @@ ec_dispatch_min(ec_fop_data_t *fop) + } + } + ++void ++ec_succeed_all(ec_fop_data_t *fop) ++{ ++ ec_dispatch_start(fop); ++ ++ if (ec_child_select(fop)) { ++ fop->expected = gf_bits_count(fop->remaining); ++ fop->first = 0; ++ ++ /* Simulate a successful execution on all bricks */ ++ ec_trace("SUCCEED", fop, ""); ++ ++ fop->good = fop->remaining; ++ fop->remaining = 0; ++ } ++} ++ + ec_lock_t * + ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc) + { +@@ -1825,7 +1857,8 @@ ec_lock_acquired(ec_lock_link_t *link) + + if (fop->use_fd && + (link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) { +- ec_fix_open(fop); ++ /* Try to reopen closed fd's only if lock has succeeded. */ ++ ec_fix_open(fop, lock->mask); + } + + ec_lock_resume_shared(&list); +diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h +index 54aaa77..e948342 100644 +--- a/xlators/cluster/ec/src/ec-common.h ++++ b/xlators/cluster/ec/src/ec-common.h +@@ -54,9 +54,12 @@ enum _ec_xattrop_flags { + + #define EC_SELFHEAL_BIT 62 + +-#define EC_MINIMUM_ONE -1 +-#define EC_MINIMUM_MIN -2 +-#define EC_MINIMUM_ALL -3 ++#define EC_MINIMUM_ONE (1 << 6) ++#define EC_MINIMUM_MIN (2 << 6) ++#define EC_MINIMUM_ALL (3 << 6) ++#define EC_FOP_NO_PROPAGATE_ERROR (1 << 8) ++#define EC_FOP_MINIMUM(_flags) ((_flags)&255) ++#define EC_FOP_FLAGS(_flags) ((_flags) & ~255) + + #define EC_UPDATE_DATA 1 + #define EC_UPDATE_META 2 +@@ -163,11 +166,14 @@ void + ec_dispatch_one(ec_fop_data_t *fop); + + void ++ec_succeed_all(ec_fop_data_t *fop); ++ ++void + ec_sleep(ec_fop_data_t *fop); + void + ec_resume(ec_fop_data_t *fop, int32_t error); + void +-ec_resume_parent(ec_fop_data_t *fop, int32_t error); ++ec_resume_parent(ec_fop_data_t *fop); + + void + ec_manager(ec_fop_data_t *fop, int32_t error); +diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c +index fae8843..6ef9340 100644 +--- a/xlators/cluster/ec/src/ec-data.c ++++ b/xlators/cluster/ec/src/ec-data.c +@@ -98,7 +98,7 @@ ec_cbk_data_destroy(ec_cbk_data_t *cbk) + + ec_fop_data_t * + ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id, +- uint32_t flags, uintptr_t target, int32_t minimum, ++ uint32_t flags, uintptr_t target, uint32_t fop_flags, + ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks, + void *data) + { +@@ -151,7 +151,8 @@ ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id, + fop->refs = 1; + + fop->flags = flags; +- fop->minimum = minimum; ++ fop->minimum = EC_FOP_MINIMUM(fop_flags); ++ fop->fop_flags = EC_FOP_FLAGS(fop_flags); + fop->mask = target; + + fop->wind = wind; +@@ -271,7 +272,7 @@ ec_fop_data_release(ec_fop_data_t *fop) + loc_wipe(&fop->loc[1]); + GF_FREE(fop->errstr); + +- ec_resume_parent(fop, fop->error); ++ ec_resume_parent(fop); + + ec_fop_cleanup(fop); + +diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h +index 112536d..c8a74ff 100644 +--- a/xlators/cluster/ec/src/ec-data.h ++++ b/xlators/cluster/ec/src/ec-data.h +@@ -18,7 +18,7 @@ ec_cbk_data_allocate(call_frame_t *frame, xlator_t *this, ec_fop_data_t *fop, + int32_t id, int32_t idx, int32_t op_ret, int32_t op_errno); + ec_fop_data_t * + ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id, +- uint32_t flags, uintptr_t target, int32_t minimum, ++ uint32_t flags, uintptr_t target, uint32_t fop_flags, + ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks, + void *data); + void +diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c +index c9db701..8310d4a 100644 +--- a/xlators/cluster/ec/src/ec-dir-read.c ++++ b/xlators/cluster/ec/src/ec-dir-read.c +@@ -219,7 +219,7 @@ ec_manager_opendir(ec_fop_data_t *fop, int32_t state) + + void + ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc, + fd_t *fd, dict_t *xdata) + { + ec_cbk_t callback = {.opendir = func}; +@@ -233,7 +233,7 @@ ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_opendir, ++ target, fop_flags, ec_wind_opendir, + ec_manager_opendir, callback, data); + if (fop == NULL) { + goto out; +@@ -515,7 +515,7 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state) + + void + ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) + { + ec_cbk_t callback = {.readdir = func}; +@@ -529,7 +529,7 @@ ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_readdir, ++ target, fop_flags, ec_wind_readdir, + ec_manager_readdir, callback, data); + if (fop == NULL) { + goto out; +@@ -585,7 +585,7 @@ ec_wind_readdirp(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) + { + ec_cbk_t callback = {.readdirp = func}; +@@ -599,7 +599,7 @@ ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate( +- frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, minimum, ++ frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, fop_flags, + ec_wind_readdirp, ec_manager_readdir, callback, data); + if (fop == NULL) { + goto out; +diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c +index e24667f..0b8ee21 100644 +--- a/xlators/cluster/ec/src/ec-dir-write.c ++++ b/xlators/cluster/ec/src/ec-dir-write.c +@@ -262,7 +262,7 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state) + + void + ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) + { + ec_cbk_t callback = {.create = func}; +@@ -275,7 +275,7 @@ ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, fop_flags, + ec_wind_create, ec_manager_create, callback, + data); + if (fop == NULL) { +@@ -432,9 +432,9 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata) ++ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata) + { + ec_cbk_t callback = {.link = func}; + ec_fop_data_t *fop = NULL; +@@ -446,7 +446,7 @@ ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, fop_flags, + ec_wind_link, ec_manager_link, callback, data); + if (fop == NULL) { + goto out; +@@ -613,9 +613,9 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode, +- mode_t umask, dict_t *xdata) ++ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc, ++ mode_t mode, mode_t umask, dict_t *xdata) + { + ec_cbk_t callback = {.mkdir = func}; + ec_fop_data_t *fop = NULL; +@@ -627,7 +627,7 @@ ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, fop_flags, + ec_wind_mkdir, ec_manager_mkdir, callback, data); + if (fop == NULL) { + goto out; +@@ -815,9 +815,9 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev, +- mode_t umask, dict_t *xdata) ++ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc, ++ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) + { + ec_cbk_t callback = {.mknod = func}; + ec_fop_data_t *fop = NULL; +@@ -829,7 +829,7 @@ ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, fop_flags, + ec_wind_mknod, ec_manager_mknod, callback, data); + if (fop == NULL) { + goto out; +@@ -975,7 +975,7 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state) + + void + ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc, ++ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) + { + ec_cbk_t callback = {.rename = func}; +@@ -988,7 +988,7 @@ ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, fop_flags, + ec_wind_rename, ec_manager_rename, callback, + data); + if (fop == NULL) { +@@ -1125,9 +1125,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags, +- dict_t *xdata) ++ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc, ++ int xflags, dict_t *xdata) + { + ec_cbk_t callback = {.rmdir = func}; + ec_fop_data_t *fop = NULL; +@@ -1139,7 +1139,7 @@ ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, fop_flags, + ec_wind_rmdir, ec_manager_rmdir, callback, data); + if (fop == NULL) { + goto out; +@@ -1281,7 +1281,7 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state) + + void + ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_symlink_cbk_t func, void *data, ++ uint32_t fop_flags, fop_symlink_cbk_t func, void *data, + const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) + { + ec_cbk_t callback = {.symlink = func}; +@@ -1294,9 +1294,9 @@ ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target, minimum, +- ec_wind_symlink, ec_manager_symlink, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target, ++ fop_flags, ec_wind_symlink, ec_manager_symlink, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1435,7 +1435,7 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state) + + void + ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc, + int xflags, dict_t *xdata) + { + ec_cbk_t callback = {.unlink = func}; +@@ -1448,7 +1448,7 @@ ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, fop_flags, + ec_wind_unlink, ec_manager_unlink, callback, + data); + if (fop == NULL) { +diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h +index 2abef0d..07edf8a 100644 +--- a/xlators/cluster/ec/src/ec-fops.h ++++ b/xlators/cluster/ec/src/ec-fops.h +@@ -18,233 +18,237 @@ + + void + ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc, + int32_t mask, dict_t *xdata); + + void + ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); + + void + ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_entrylk_cbk_t func, void *data, ++ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata); + + void + ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fentrylk_cbk_t func, void *data, ++ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data, + const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata); + + void +-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata); ++ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd, ++ dict_t *xdata); + + void +-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync, +- dict_t *xdata); ++ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd, ++ int32_t datasync, dict_t *xdata); + + void + ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd, + int32_t datasync, dict_t *xdata); + + void + ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc, + const char *name, dict_t *xdata); + + void + ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd, + const char *name, dict_t *xdata); + + void +-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial, +- dict_t *xdata); ++ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc, ++ int32_t partial, dict_t *xdata); + + void +-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial, +- dict_t *xdata); ++ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd, ++ int32_t partial, dict_t *xdata); + + void + ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func, ++ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func, + void *data, const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *flock, dict_t *xdata); + + void + ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func, ++ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func, + void *data, const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata); + + void +-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata); ++ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata); + + void +-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, ++ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags, + fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata); + + void + ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc, + dict_t *xdata); + + void +-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode, +- mode_t umask, dict_t *xdata); ++ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc, ++ mode_t mode, mode_t umask, dict_t *xdata); + + void +-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev, +- mode_t umask, dict_t *xdata); ++ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc, ++ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata); + + void +-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd, +- dict_t *xdata); ++ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc, ++ int32_t flags, fd_t *fd, dict_t *xdata); + + void + ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc, + fd_t *fd, dict_t *xdata); + + void + ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd, + size_t size, off_t offset, dict_t *xdata); + + void + ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd, + size_t size, off_t offset, dict_t *xdata); + + void + ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc, + size_t size, dict_t *xdata); + + void +-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset, +- uint32_t flags, dict_t *xdata); ++ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd, ++ size_t size, off_t offset, uint32_t flags, dict_t *xdata); + + void + ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_removexattr_cbk_t func, void *data, ++ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data, + loc_t *loc, const char *name, dict_t *xdata); + + void + ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fremovexattr_cbk_t func, void *data, ++ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data, + fd_t *fd, const char *name, dict_t *xdata); + + void + ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc, ++ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc, + loc_t *newloc, dict_t *xdata); + + void +-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags, +- dict_t *xdata); ++ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc, ++ int xflags, dict_t *xdata); + + void + ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + + void + ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + + void + ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata); + + void + ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd, + dict_t *dict, int32_t flags, dict_t *xdata); + + void +-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata); ++ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc, ++ dict_t *xdata); + + void +-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata); ++ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd, ++ dict_t *xdata); + + void + ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc, + dict_t *xdata); + + void + ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_symlink_cbk_t func, void *data, ++ uint32_t fop_flags, fop_symlink_cbk_t func, void *data, + const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata); + + void + ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd, + int32_t mode, off_t offset, size_t len, dict_t *xdata); + + void + ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd, + off_t offset, size_t len, dict_t *xdata); + + void + ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc, + off_t offset, dict_t *xdata); + + void + ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd, + off_t offset, dict_t *xdata); + + void + ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc, + int xflags, dict_t *xdata); + + void + ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata); + + void + ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata); + + void + ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata); + + void +-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset, +- gf_seek_what_t what, dict_t *xdata); ++ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd, ++ off_t offset, gf_seek_what_t what, dict_t *xdata); + + void +-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata); ++ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op, ++ dict_t *xdata); + + #endif /* __EC_FOPS_H__ */ +diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c +index 175e88a..acc16b5 100644 +--- a/xlators/cluster/ec/src/ec-generic.c ++++ b/xlators/cluster/ec/src/ec-generic.c +@@ -151,8 +151,9 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata) ++ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd, ++ dict_t *xdata) + { + ec_cbk_t callback = {.flush = func}; + ec_fop_data_t *fop = NULL; +@@ -164,7 +165,7 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags, + ec_wind_flush, ec_manager_flush, callback, data); + if (fop == NULL) { + goto out; +@@ -366,9 +367,9 @@ ec_manager_fsync(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync, +- dict_t *xdata) ++ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd, ++ int32_t datasync, dict_t *xdata) + { + ec_cbk_t callback = {.fsync = func}; + ec_fop_data_t *fop = NULL; +@@ -380,7 +381,7 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags, + ec_wind_fsync, ec_manager_fsync, callback, data); + if (fop == NULL) { + goto out; +@@ -553,7 +554,7 @@ ec_manager_fsyncdir(ec_fop_data_t *fop, int32_t state) + + void + ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd, + int32_t datasync, dict_t *xdata) + { + ec_cbk_t callback = {.fsyncdir = func}; +@@ -566,9 +567,9 @@ ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target, minimum, +- ec_wind_fsyncdir, ec_manager_fsyncdir, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target, ++ fop_flags, ec_wind_fsyncdir, ec_manager_fsyncdir, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -848,7 +849,7 @@ ec_manager_lookup(ec_fop_data_t *fop, int32_t state) + + void + ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc, + dict_t *xdata) + { + ec_cbk_t callback = {.lookup = func}; +@@ -862,7 +863,7 @@ ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_LOOKUP, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_lookup, ++ target, fop_flags, ec_wind_lookup, + ec_manager_lookup, callback, data); + if (fop == NULL) { + goto out; +@@ -1033,7 +1034,7 @@ ec_manager_statfs(ec_fop_data_t *fop, int32_t state) + + void + ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc, + dict_t *xdata) + { + ec_cbk_t callback = {.statfs = func}; +@@ -1047,7 +1048,7 @@ ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_STATFS, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_statfs, ++ target, fop_flags, ec_wind_statfs, + ec_manager_statfs, callback, data); + if (fop == NULL) { + goto out; +@@ -1270,7 +1271,7 @@ ec_manager_xattrop(ec_fop_data_t *fop, int32_t state) + + void + ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) + { + ec_cbk_t callback = {.xattrop = func}; +@@ -1283,9 +1284,9 @@ ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target, minimum, +- ec_wind_xattrop, ec_manager_xattrop, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target, ++ fop_flags, ec_wind_xattrop, ec_manager_xattrop, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1343,7 +1344,7 @@ ec_wind_fxattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) + { + ec_cbk_t callback = {.fxattrop = func}; +@@ -1356,9 +1357,9 @@ ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target, minimum, +- ec_wind_fxattrop, ec_manager_xattrop, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target, ++ fop_flags, ec_wind_fxattrop, ec_manager_xattrop, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1507,8 +1508,9 @@ ec_manager_ipc(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata) ++ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op, ++ dict_t *xdata) + { + ec_cbk_t callback = {.ipc = func}; + ec_fop_data_t *fop = NULL; +@@ -1520,7 +1522,7 @@ ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, fop_flags, + ec_wind_ipc, ec_manager_ipc, callback, data); + if (fop == NULL) { + goto out; +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 1ca12c1..3aa04fb 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -367,16 +367,16 @@ ec_heal_data_block(ec_heal_t *heal) + /* FOP: fheal */ + + void +-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial, +- dict_t *xdata) ++ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd, ++ int32_t partial, dict_t *xdata) + { + ec_fd_t *ctx = ec_fd_get(fd, this); + + if (ctx != NULL) { + gf_msg_trace("ec", 0, "FHEAL ctx: flags=%X, open=%" PRIXPTR, ctx->flags, + ctx->open); +- ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial, ++ ec_heal(frame, this, target, fop_flags, func, data, &ctx->loc, partial, + xdata); + } + } +@@ -1975,7 +1975,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) + /*Takes lock */ + void + ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_heal_cbk_t func, ec_heal_t *heal) ++ uint32_t fop_flags, fop_heal_cbk_t func, ec_heal_t *heal) + { + ec_cbk_t callback = {.heal = func}; + ec_fop_data_t *fop = NULL; +@@ -1986,7 +1986,7 @@ ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target, + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags, + NULL, ec_manager_heal_block, callback, heal); + if (fop == NULL) + goto out; +@@ -2761,9 +2761,9 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) + } + + void +-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial, +- dict_t *xdata) ++ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc, ++ int32_t partial, dict_t *xdata) + { + ec_cbk_t callback = {.heal = func}; + ec_fop_data_t *fop = NULL; +@@ -2779,7 +2779,7 @@ ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + + if (frame && frame->local) + goto fail; +- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags, + NULL, NULL, callback, data); + + err = ENOMEM; +diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c +index 55e5934..f87a94a 100644 +--- a/xlators/cluster/ec/src/ec-inode-read.c ++++ b/xlators/cluster/ec/src/ec-inode-read.c +@@ -135,7 +135,7 @@ ec_manager_access(ec_fop_data_t *fop, int32_t state) + + void + ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc, + int32_t mask, dict_t *xdata) + { + ec_cbk_t callback = {.access = func}; +@@ -149,7 +149,7 @@ ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_ACCESS, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_access, ++ target, fop_flags, ec_wind_access, + ec_manager_access, callback, data); + if (fop == NULL) { + goto out; +@@ -446,7 +446,7 @@ out: + + void + ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc, + const char *name, dict_t *xdata) + { + ec_cbk_t callback = {.getxattr = func}; +@@ -468,7 +468,7 @@ ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + } + + fop = ec_fop_data_allocate( +- frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, minimum, ++ frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags, + ec_wind_getxattr, ec_manager_getxattr, callback, data); + if (fop == NULL) { + goto out; +@@ -588,7 +588,7 @@ ec_wind_fgetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd, + const char *name, dict_t *xdata) + { + ec_cbk_t callback = {.fgetxattr = func}; +@@ -602,7 +602,7 @@ ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate( +- frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, minimum, ++ frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags, + ec_wind_fgetxattr, ec_manager_getxattr, callback, data); + if (fop == NULL) { + goto out; +@@ -869,9 +869,9 @@ ec_manager_open(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd, +- dict_t *xdata) ++ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc, ++ int32_t flags, fd_t *fd, dict_t *xdata) + { + ec_cbk_t callback = {.open = func}; + ec_fop_data_t *fop = NULL; +@@ -884,7 +884,7 @@ ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_OPEN, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_open, ec_manager_open, ++ target, fop_flags, ec_wind_open, ec_manager_open, + callback, data); + if (fop == NULL) { + goto out; +@@ -1071,7 +1071,7 @@ ec_manager_readlink(ec_fop_data_t *fop, int32_t state) + + void + ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc, + size_t size, dict_t *xdata) + { + ec_cbk_t callback = {.readlink = func}; +@@ -1085,7 +1085,7 @@ ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate( +- frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, minimum, ++ frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, fop_flags, + ec_wind_readlink, ec_manager_readlink, callback, data); + if (fop == NULL) { + goto out; +@@ -1417,9 +1417,9 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset, +- uint32_t flags, dict_t *xdata) ++ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd, ++ size_t size, off_t offset, uint32_t flags, dict_t *xdata) + { + ec_cbk_t callback = {.readv = func}; + ec_fop_data_t *fop = NULL; +@@ -1432,8 +1432,8 @@ ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_READ, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_readv, ec_manager_readv, +- callback, data); ++ target, fop_flags, ec_wind_readv, ++ ec_manager_readv, callback, data); + if (fop == NULL) { + goto out; + } +@@ -1637,9 +1637,9 @@ ec_manager_seek(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset, +- gf_seek_what_t what, dict_t *xdata) ++ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd, ++ off_t offset, gf_seek_what_t what, dict_t *xdata) + { + ec_cbk_t callback = {.seek = func}; + ec_fop_data_t *fop = NULL; +@@ -1652,7 +1652,7 @@ ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_SEEK, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_seek, ec_manager_seek, ++ target, fop_flags, ec_wind_seek, ec_manager_seek, + callback, data); + if (fop == NULL) { + goto out; +@@ -1855,8 +1855,9 @@ ec_manager_stat(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata) ++ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc, ++ dict_t *xdata) + { + ec_cbk_t callback = {.stat = func}; + ec_fop_data_t *fop = NULL; +@@ -1869,7 +1870,7 @@ ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_STAT, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_stat, ec_manager_stat, ++ target, fop_flags, ec_wind_stat, ec_manager_stat, + callback, data); + if (fop == NULL) { + goto out; +@@ -1965,8 +1966,9 @@ ec_wind_fstat(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + } + + void +-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata) ++ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd, ++ dict_t *xdata) + { + ec_cbk_t callback = {.fstat = func}; + ec_fop_data_t *fop = NULL; +@@ -1979,8 +1981,8 @@ ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSTAT, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_fstat, ec_manager_stat, +- callback, data); ++ target, fop_flags, ec_wind_fstat, ++ ec_manager_stat, callback, data); + if (fop == NULL) { + goto out; + } +diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c +index e7b34e6..a903664 100644 +--- a/xlators/cluster/ec/src/ec-inode-write.c ++++ b/xlators/cluster/ec/src/ec-inode-write.c +@@ -281,7 +281,7 @@ ec_manager_xattr(ec_fop_data_t *fop, int32_t state) + + void + ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_removexattr_cbk_t func, void *data, ++ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data, + loc_t *loc, const char *name, dict_t *xdata) + { + ec_cbk_t callback = {.removexattr = func}; +@@ -295,7 +295,7 @@ ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR, 0, target, +- minimum, ec_wind_removexattr, ec_manager_xattr, ++ fop_flags, ec_wind_removexattr, ec_manager_xattr, + callback, data); + if (fop == NULL) { + goto out; +@@ -361,7 +361,7 @@ ec_wind_fremovexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fremovexattr_cbk_t func, void *data, ++ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data, + fd_t *fd, const char *name, dict_t *xdata) + { + ec_cbk_t callback = {.fremovexattr = func}; +@@ -375,8 +375,8 @@ ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR, 0, target, +- minimum, ec_wind_fremovexattr, ec_manager_xattr, +- callback, data); ++ fop_flags, ec_wind_fremovexattr, ++ ec_manager_xattr, callback, data); + if (fop == NULL) { + goto out; + } +@@ -550,7 +550,7 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state) + + void + ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) + { + ec_cbk_t callback = {.setattr = func}; +@@ -563,9 +563,9 @@ ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target, minimum, +- ec_wind_setattr, ec_manager_setattr, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target, ++ fop_flags, ec_wind_setattr, ec_manager_setattr, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -627,7 +627,7 @@ ec_wind_fsetattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) + { + ec_cbk_t callback = {.fsetattr = func}; +@@ -640,9 +640,9 @@ ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target, minimum, +- ec_wind_fsetattr, ec_manager_setattr, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target, ++ fop_flags, ec_wind_fsetattr, ec_manager_setattr, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -707,7 +707,7 @@ ec_wind_setxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata) + { + ec_cbk_t callback = {.setxattr = func}; +@@ -720,9 +720,9 @@ ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target, minimum, +- ec_wind_setxattr, ec_manager_xattr, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target, ++ fop_flags, ec_wind_setxattr, ec_manager_xattr, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -825,7 +825,7 @@ ec_wind_fsetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd, + dict_t *dict, int32_t flags, dict_t *xdata) + { + ec_cbk_t callback = {.fsetxattr = func}; +@@ -839,7 +839,7 @@ ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETXATTR, 0, target, +- minimum, ec_wind_fsetxattr, ec_manager_xattr, ++ fop_flags, ec_wind_fsetxattr, ec_manager_xattr, + callback, data); + if (fop == NULL) { + goto out; +@@ -1035,7 +1035,7 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state) + + void + ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd, + int32_t mode, off_t offset, size_t len, dict_t *xdata) + { + ec_cbk_t callback = {.fallocate = func}; +@@ -1049,8 +1049,8 @@ ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FALLOCATE, 0, target, +- minimum, ec_wind_fallocate, ec_manager_fallocate, +- callback, data); ++ fop_flags, ec_wind_fallocate, ++ ec_manager_fallocate, callback, data); + if (fop == NULL) { + goto out; + } +@@ -1209,8 +1209,8 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state) + ec_dispatch_all(fop); + return EC_STATE_DELAYED_START; + } else { +- /*Assume discard to have succeeded on mask*/ +- fop->good = fop->mask; ++ /* Assume discard to have succeeded on all bricks */ ++ ec_succeed_all(fop); + } + + /* Fall through */ +@@ -1289,7 +1289,7 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state) + + void + ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd, + off_t offset, size_t len, dict_t *xdata) + { + ec_cbk_t callback = {.discard = func}; +@@ -1302,9 +1302,9 @@ ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target, minimum, +- ec_wind_discard, ec_manager_discard, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target, ++ fop_flags, ec_wind_discard, ec_manager_discard, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1530,7 +1530,7 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state) + + void + ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc, + off_t offset, dict_t *xdata) + { + ec_cbk_t callback = {.truncate = func}; +@@ -1543,9 +1543,9 @@ ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target, minimum, +- ec_wind_truncate, ec_manager_truncate, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target, ++ fop_flags, ec_wind_truncate, ec_manager_truncate, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1604,7 +1604,7 @@ ec_wind_ftruncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd, + off_t offset, dict_t *xdata) + { + ec_cbk_t callback = {.ftruncate = func}; +@@ -1618,8 +1618,8 @@ ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FTRUNCATE, 0, target, +- minimum, ec_wind_ftruncate, ec_manager_truncate, +- callback, data); ++ fop_flags, ec_wind_ftruncate, ++ ec_manager_truncate, callback, data); + if (fop == NULL) { + goto out; + } +@@ -2262,7 +2262,7 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state) + + void + ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) + { +@@ -2276,7 +2276,7 @@ ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, fop_flags, + ec_wind_writev, ec_manager_writev, callback, + data); + if (fop == NULL) { +diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c +index f978af0..ffcac07 100644 +--- a/xlators/cluster/ec/src/ec-locks.c ++++ b/xlators/cluster/ec/src/ec-locks.c +@@ -275,7 +275,7 @@ ec_manager_entrylk(ec_fop_data_t *fop, int32_t state) + + void + ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_entrylk_cbk_t func, void *data, ++ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) + { +@@ -288,9 +288,9 @@ ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target, minimum, +- ec_wind_entrylk, ec_manager_entrylk, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target, ++ fop_flags, ec_wind_entrylk, ec_manager_entrylk, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -403,7 +403,7 @@ ec_wind_fentrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fentrylk_cbk_t func, void *data, ++ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data, + const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) + { +@@ -416,9 +416,9 @@ ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target, minimum, +- ec_wind_fentrylk, ec_manager_entrylk, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target, ++ fop_flags, ec_wind_fentrylk, ec_manager_entrylk, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -650,7 +650,7 @@ ec_manager_inodelk(ec_fop_data_t *fop, int32_t state) + + void + ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func, ++ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func, + void *data, const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) + { +@@ -664,9 +664,9 @@ ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target, minimum, +- ec_wind_inodelk, ec_manager_inodelk, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target, ++ fop_flags, ec_wind_inodelk, ec_manager_inodelk, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -782,7 +782,7 @@ ec_wind_finodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func, ++ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func, + void *data, const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) + { +@@ -796,9 +796,9 @@ ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target, minimum, +- ec_wind_finodelk, ec_manager_inodelk, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target, ++ fop_flags, ec_wind_finodelk, ec_manager_inodelk, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1032,7 +1032,7 @@ ec_manager_lk(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, ++ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags, + fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) + { +@@ -1045,7 +1045,7 @@ ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, fop_flags, + ec_wind_lk, ec_manager_lk, callback, data); + if (fop == NULL) { + goto out; +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index 6ae4a2b..1c295c0 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -308,9 +308,9 @@ struct _ec_fop_data { + int32_t id; /* ID of the file operation */ + int32_t refs; + int32_t state; +- int32_t minimum; /* Minimum number of successful +- operation required to conclude a +- fop as successful */ ++ uint32_t minimum; /* Minimum number of successful ++ operation required to conclude a ++ fop as successful */ + int32_t expected; + int32_t winds; + int32_t jobs; +@@ -325,11 +325,12 @@ struct _ec_fop_data { + ec_cbk_data_t *answer; /* accepted answer */ + int32_t lock_count; + int32_t locked; ++ gf_lock_t lock; + ec_lock_link_t locks[2]; + int32_t first_lock; +- gf_lock_t lock; + +- uint32_t flags; ++ uint32_t fop_flags; /* Flags passed by the caller. */ ++ uint32_t flags; /* Internal flags. */ + uint32_t first; + uintptr_t mask; + uintptr_t healing; /*Dispatch is done but call is successful only +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 13ffeb9..3c8013e 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -797,11 +797,12 @@ ec_gf_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) + { +- int32_t minimum = EC_MINIMUM_ALL; ++ uint32_t fop_flags = EC_MINIMUM_ALL; ++ + if (cmd == ENTRYLK_UNLOCK) +- minimum = EC_MINIMUM_ONE; +- ec_entrylk(frame, this, -1, minimum, default_entrylk_cbk, NULL, volume, loc, +- basename, cmd, type, xdata); ++ fop_flags = EC_MINIMUM_ONE; ++ ec_entrylk(frame, this, -1, fop_flags, default_entrylk_cbk, NULL, volume, ++ loc, basename, cmd, type, xdata); + + return 0; + } +@@ -811,10 +812,11 @@ ec_gf_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) + { +- int32_t minimum = EC_MINIMUM_ALL; ++ uint32_t fop_flags = EC_MINIMUM_ALL; ++ + if (cmd == ENTRYLK_UNLOCK) +- minimum = EC_MINIMUM_ONE; +- ec_fentrylk(frame, this, -1, minimum, default_fentrylk_cbk, NULL, volume, ++ fop_flags = EC_MINIMUM_ONE; ++ ec_fentrylk(frame, this, -1, fop_flags, default_fentrylk_cbk, NULL, volume, + fd, basename, cmd, type, xdata); + + return 0; +@@ -905,7 +907,7 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + { + int error = 0; + ec_t *ec = this->private; +- int32_t minimum = EC_MINIMUM_ONE; ++ int32_t fop_flags = EC_MINIMUM_ONE; + + if (name && strcmp(name, EC_XATTR_HEAL) != 0) { + EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out); +@@ -920,11 +922,11 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + + if (name && ((fnmatch(GF_XATTR_STIME_PATTERN, name, 0) == 0) || + XATTR_IS_NODE_UUID(name) || XATTR_IS_NODE_UUID_LIST(name))) { +- minimum = EC_MINIMUM_ALL; ++ fop_flags = EC_MINIMUM_ALL; + } + +- ec_getxattr(frame, this, -1, minimum, default_getxattr_cbk, NULL, loc, name, +- xdata); ++ ec_getxattr(frame, this, -1, fop_flags, default_getxattr_cbk, NULL, loc, ++ name, xdata); + + return 0; + out: +@@ -954,11 +956,12 @@ int32_t + ec_gf_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata) + { +- int32_t minimum = EC_MINIMUM_ALL; ++ int32_t fop_flags = EC_MINIMUM_ALL; ++ + if (flock->l_type == F_UNLCK) +- minimum = EC_MINIMUM_ONE; ++ fop_flags = EC_MINIMUM_ONE; + +- ec_inodelk(frame, this, &frame->root->lk_owner, -1, minimum, ++ ec_inodelk(frame, this, &frame->root->lk_owner, -1, fop_flags, + default_inodelk_cbk, NULL, volume, loc, cmd, flock, xdata); + + return 0; +@@ -968,10 +971,11 @@ int32_t + ec_gf_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) + { +- int32_t minimum = EC_MINIMUM_ALL; ++ int32_t fop_flags = EC_MINIMUM_ALL; ++ + if (flock->l_type == F_UNLCK) +- minimum = EC_MINIMUM_ONE; +- ec_finodelk(frame, this, &frame->root->lk_owner, -1, minimum, ++ fop_flags = EC_MINIMUM_ONE; ++ ec_finodelk(frame, this, &frame->root->lk_owner, -1, fop_flags, + default_finodelk_cbk, NULL, volume, fd, cmd, flock, xdata); + + return 0; +@@ -991,10 +995,11 @@ int32_t + ec_gf_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) + { +- int32_t minimum = EC_MINIMUM_ALL; ++ int32_t fop_flags = EC_MINIMUM_ALL; ++ + if (flock->l_type == F_UNLCK) +- minimum = EC_MINIMUM_ONE; +- ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd, flock, ++ fop_flags = EC_MINIMUM_ONE; ++ ec_lk(frame, this, -1, fop_flags, default_lk_cbk, NULL, fd, cmd, flock, + xdata); + + return 0; +-- +1.8.3.1 + diff --git a/SOURCES/0117-spec-Remove-thin-arbiter-package.patch b/SOURCES/0117-spec-Remove-thin-arbiter-package.patch new file mode 100644 index 0000000..47fbffc --- /dev/null +++ b/SOURCES/0117-spec-Remove-thin-arbiter-package.patch @@ -0,0 +1,184 @@ +From 70842c77735a655a053ed4a7cb77fec01028355a Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya +Date: Mon, 22 Apr 2019 12:48:13 +0530 +Subject: [PATCH 117/124] spec: Remove thin-arbiter package + +Thin-arbiter is not supported in downstream. Updated the +code to avoid RPMdiff warnings. Marked thin-arbiter +test cases as bad to avoid nightly runs from reporting +expected failures. + +Label: DOWNSTREAM ONLY + +BUG: 1698436 +Change-Id: Ic36bccdfe1c7039fb7e5ce078a8b64cf71056970 +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/168406 +Tested-by: RHGS Build Bot +--- + configure.ac | 2 -- + extras/systemd/Makefile.am | 6 ++--- + glusterfs.spec.in | 39 +++++---------------------------- + tests/basic/afr/ta-check-locks.t | 2 ++ + tests/basic/afr/ta-read.t | 2 ++ + tests/basic/afr/ta-shd.t | 2 ++ + tests/basic/afr/ta-write-on-bad-brick.t | 2 ++ + xlators/features/Makefile.am | 2 +- + 8 files changed, 18 insertions(+), 39 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 521671b..3065077 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -116,8 +116,6 @@ AC_CONFIG_FILES([Makefile + xlators/features/Makefile + xlators/features/arbiter/Makefile + xlators/features/arbiter/src/Makefile +- xlators/features/thin-arbiter/Makefile +- xlators/features/thin-arbiter/src/Makefile + xlators/features/changelog/Makefile + xlators/features/changelog/src/Makefile + xlators/features/changelog/lib/Makefile +diff --git a/extras/systemd/Makefile.am b/extras/systemd/Makefile.am +index 61446a9..b849775 100644 +--- a/extras/systemd/Makefile.am ++++ b/extras/systemd/Makefile.am +@@ -1,8 +1,8 @@ +-CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service gluster-ta-volume.service +-EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in gluster-ta-volume.service.in ++CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service ++EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in + + if USE_SYSTEMD +-systemd_DATA = gluster-ta-volume.service ++systemd_DATA = + endif + + if WITH_SERVER +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index ba095b7..bf72a55 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -682,18 +682,6 @@ is in user space and easily manageable. + This package provides the glusterfs server daemon. + %endif + +-%package thin-arbiter +-Summary: GlusterFS thin-arbiter module +-Requires: %{name}%{?_isa} = %{version}-%{release} +-Requires: %{name}-server%{?_isa} = %{version}-%{release} +- +-%description thin-arbiter +-This package provides a tie-breaker functionality to GlusterFS +-replicate volume. It includes translators required to provide the +-functionality, and also few other scripts required for getting the setup done. +- +-This package provides the glusterfs thin-arbiter translator. +- + %package client-xlators + Summary: GlusterFS client-side translators + Requires: %{name}-libs%{?_isa} = %{version}-%{release} +@@ -1045,14 +1033,6 @@ fi + exit 0 + %endif + +-%preun thin-arbiter +-if [ $1 -eq 0 ]; then +- if [ -f %glusterta_svcfile ]; then +- %service_stop gluster-ta-volume +- %systemd_preun gluster-ta-volume +- fi +-fi +- + ##----------------------------------------------------------------------------- + ## All %%postun should be placed here and keep them sorted + ## +@@ -1188,6 +1168,12 @@ exit 0 + %exclude %{_prefix}/lib/ocf/resource.d/heartbeat/* + %endif + ++%exclude %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh ++ ++%if ( 0%{?_without_server:1} ) ++%exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol ++%endif ++ + %files api + %exclude %{_libdir}/*.so + # libgfapi files +@@ -1263,19 +1249,6 @@ exit 0 + %{_bindir}/fusermount-glusterfs + %endif + +-%files thin-arbiter +-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features +- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so +-%dir %{_datadir}/glusterfs/scripts +- %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh +-%config %{_sysconfdir}/glusterfs/thin-arbiter.vol +- +-%if ( 0%{?_with_systemd:1} ) +-%{_unitdir}/gluster-ta-volume.service +-%endif +- +- + %if ( 0%{!?_without_georeplication:1} ) + %files geo-replication + %config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-georep +diff --git a/tests/basic/afr/ta-check-locks.t b/tests/basic/afr/ta-check-locks.t +index c0102c3..c51aa39 100644 +--- a/tests/basic/afr/ta-check-locks.t ++++ b/tests/basic/afr/ta-check-locks.t +@@ -66,3 +66,5 @@ TEST ta_start_brick_process brick0 + EXPECT_WITHIN $HEAL_TIMEOUT "0" get_lock_count_on_ta + + cleanup; ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000 +diff --git a/tests/basic/afr/ta-read.t b/tests/basic/afr/ta-read.t +index f2b3c38..1b36dba 100644 +--- a/tests/basic/afr/ta-read.t ++++ b/tests/basic/afr/ta-read.t +@@ -58,3 +58,5 @@ TEST [ -z $TA_PID ] + # Read must now succeed. + TEST cat $M0/FILE + cleanup; ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000 +diff --git a/tests/basic/afr/ta-shd.t b/tests/basic/afr/ta-shd.t +index bb2e58b..4b1ea85 100644 +--- a/tests/basic/afr/ta-shd.t ++++ b/tests/basic/afr/ta-shd.t +@@ -47,3 +47,5 @@ TEST ta_start_mount_process $M0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 + EXPECT "Hello" cat $M0/a.txt + cleanup; ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000 +diff --git a/tests/basic/afr/ta-write-on-bad-brick.t b/tests/basic/afr/ta-write-on-bad-brick.t +index 18cb65b..77cbf5f 100644 +--- a/tests/basic/afr/ta-write-on-bad-brick.t ++++ b/tests/basic/afr/ta-write-on-bad-brick.t +@@ -49,3 +49,5 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate + TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5 + + cleanup; ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000 +diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am +index 545c02b..537c148 100644 +--- a/xlators/features/Makefile.am ++++ b/xlators/features/Makefile.am +@@ -4,7 +4,7 @@ endif + + SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \ + compress changelog gfid-access snapview-client snapview-server trash \ +- shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \ ++ shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) \ + utime changetimerecorder + + CLEANFILES = +-- +1.8.3.1 + diff --git a/SOURCES/0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch b/SOURCES/0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch new file mode 100644 index 0000000..328116b --- /dev/null +++ b/SOURCES/0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch @@ -0,0 +1,31 @@ +From 24c2430e3cd629665851fdb2921d754e3ecef3b4 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Wed, 24 Apr 2019 07:47:32 +0530 +Subject: [PATCH 118/124] tests: mark thin-arbiter test ta.t as bad + +As thin-arbiter isn't packaged and tested at RHGS 3.5 + +Label: DOWNSTREAM ONLY + +BUG: 1698436 +Change-Id: Ideab570f58fc0b2daecb2be4ed4b6740362d0b35 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/168613 +Tested-by: RHGS Build Bot +--- + tests/basic/afr/ta.t | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tests/basic/afr/ta.t b/tests/basic/afr/ta.t +index 05d4843..cbd1b56 100644 +--- a/tests/basic/afr/ta.t ++++ b/tests/basic/afr/ta.t +@@ -52,3 +52,5 @@ TEST ! ls $B0/brick0/c.txt + TEST ! ls $B0/brick1/c.txt + + cleanup; ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000 +-- +1.8.3.1 + diff --git a/SOURCES/0119-glusterd-provide-a-way-to-detach-failed-node.patch b/SOURCES/0119-glusterd-provide-a-way-to-detach-failed-node.patch new file mode 100644 index 0000000..fa9198b --- /dev/null +++ b/SOURCES/0119-glusterd-provide-a-way-to-detach-failed-node.patch @@ -0,0 +1,53 @@ +From a325e7b3bbe5c1f67b999f375b83d2e2f1b2c1c6 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Tue, 9 Apr 2019 13:56:24 +0530 +Subject: [PATCH 119/124] glusterd: provide a way to detach failed node + +When a gluster node in trusted storage pool has failed +due to hardware issues, volume delete operation fails +saying "Not all peers are up" and peer detach for failed +node fails saying "Brick(s) with peer exists +in cluster". + +The idea here is to use either replace-brick or remove-brick +command to remove all the bricks hosted by failed node and +then re-attempting the peer detach. This change adds this +trick in peer detach error message. + +> upstream patch : https://review.gluster.org/22534 + +>fixes: bz#1697866 +>Change-Id: I0c58887479d31db603ad8d6535ea9d547880ccc8 +>Signed-off-by: Sanju Rakonde + +BUG: 1696334 +Change-Id: I0c58887479d31db603ad8d6535ea9d547880ccc8 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/168614 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 6147995..af8a8a4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -4134,8 +4134,11 @@ set_deprobe_error_str(int op_ret, int op_errno, char *op_errstr, char *errstr, + + case GF_DEPROBE_BRICK_EXIST: + snprintf(errstr, len, +- "Brick(s) with the peer " +- "%s exist in cluster", ++ "Peer %s hosts one or more bricks. If the peer is in " ++ "not recoverable state then use either replace-brick " ++ "or remove-brick command with force to remove all " ++ "bricks from the peer and attempt the peer detach " ++ "again.", + hostname); + break; + +-- +1.8.3.1 + diff --git a/SOURCES/0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch b/SOURCES/0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch new file mode 100644 index 0000000..58b86d7 --- /dev/null +++ b/SOURCES/0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch @@ -0,0 +1,62 @@ +From c429d3c63601e6ea15af76aa684c30bbeb746467 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Tue, 23 Apr 2019 13:03:28 +0530 +Subject: [PATCH 120/124] glusterd/shd: Keep a ref on volinfo until attach rpc + execute cbk + +When svc attach execute for multiplexing a daemon, we have to keep +a ref on volinfo until it finish the execution. Because, if the attach +is an aysnc call, then a parallel volume delete can lead to free the +volinfo + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22606/ + +>Change-Id: Ibc02b89557baaed2f63db63d7fb1a7480444ae0d +>fixes: bz#1702185 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: Ibc02b89557baaed2f63db63d7fb1a7480444ae0d +BUG: 1702240 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/168616 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 3 +++ + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 4 ++++ + 2 files changed, 7 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 19eca9f..a9eab42 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -452,8 +452,11 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) + } + + if (shd->attached) { ++ glusterd_volinfo_ref(volinfo); ++ /* Unref will happen from glusterd_svc_attach_cbk */ + ret = glusterd_attach_svc(svc, volinfo, flags); + if (ret) { ++ glusterd_volinfo_unref(volinfo); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to attach shd svc(volume=%s) to pid=%d. Starting" + "a new process", +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index 02945b1..f7be394 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -695,6 +695,10 @@ out: + if (flag) { + GF_FREE(flag); + } ++ ++ if (volinfo) ++ glusterd_volinfo_unref(volinfo); ++ + GF_ATOMIC_DEC(conf->blockers); + STACK_DESTROY(frame->root); + return 0; +-- +1.8.3.1 + diff --git a/SOURCES/0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch b/SOURCES/0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch new file mode 100644 index 0000000..00aa910 --- /dev/null +++ b/SOURCES/0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch @@ -0,0 +1,42 @@ +From e4209dfb27faeca5544a09474ac524546e5d11e0 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham +Date: Wed, 24 Apr 2019 18:14:33 +0530 +Subject: [PATCH 121/124] spec: glusterfs-devel for client-build should not + depend on server + +Found that libgfdb.pc was included in client package. +It was earlier removed from glusterfs-devel client package +as a part of: +40eb62a8872ce061416e899fb6c0784b6253ab16 + +Made it back into downstream when tier was introduced again. +Removing it in this patch. + +label: DOWNSTREAM ONLY + +Change-Id: I5fd5f5b0a6b06c677f8ea3693eb0392af51abaf1 +Signed-off-by: Hari Gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/168670 +Tested-by: RHGS Build Bot +Reviewed-by: Milind Changire +Reviewed-by: Atin Mukherjee +--- + glusterfs.spec.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index bf72a55..d20b062 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1210,7 +1210,7 @@ exit 0 + %exclude %{_libdir}/pkgconfig/libgfchangelog.pc + %exclude %{_libdir}/libgfchangelog.so + %if ( 0%{!?_without_tiering:1} ) +-%{_libdir}/pkgconfig/libgfdb.pc ++%exclude %{_libdir}/pkgconfig/libgfdb.pc + %endif + %else + %{_libdir}/pkgconfig/libgfchangelog.pc +-- +1.8.3.1 + diff --git a/SOURCES/0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch b/SOURCES/0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch new file mode 100644 index 0000000..5d256e2 --- /dev/null +++ b/SOURCES/0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch @@ -0,0 +1,312 @@ +From 2f07d12f902e371d8cb8c76007d558e3a727b56a Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Tue, 9 Apr 2019 18:23:05 +0530 +Subject: [PATCH 122/124] posix/ctime: Fix stat(time attributes) inconsistency + during readdirp + +Problem: + Creation of tar file on gluster volume throws warning +'file changed as we read it' + +Cause: + During readdirp, for few of the files whose inode is not +present, time attributes were served from backend. This caused +the ctime of few files to be different between before readdir +and after readdir by tar. + +Solution: + If ctime feature is enabled and inode is not present, don't +serve the time attributes from backend file, serve it from xattr. + +Backport of: + > Patch: https://review.gluster.org/22540 + > fixes: bz#1698078 + > Change-Id: I427ef865f97399475faf5aa6ca495f7e317603ae + > Signed-off-by: Kotresh HR + +BUG: 1699709 +Change-Id: I427ef865f97399475faf5aa6ca495f7e317603ae +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/168687 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + tests/basic/ctime/ctime-readdir.c | 29 +++++++++++++++++ + tests/basic/ctime/ctime-readdir.t | 50 ++++++++++++++++++++++++++++++ + xlators/storage/posix/src/posix-helpers.c | 29 +++++++++++------ + xlators/storage/posix/src/posix-metadata.c | 41 ++++++++++++++---------- + 4 files changed, 123 insertions(+), 26 deletions(-) + create mode 100644 tests/basic/ctime/ctime-readdir.c + create mode 100644 tests/basic/ctime/ctime-readdir.t + +diff --git a/tests/basic/ctime/ctime-readdir.c b/tests/basic/ctime/ctime-readdir.c +new file mode 100644 +index 0000000..8760db2 +--- /dev/null ++++ b/tests/basic/ctime/ctime-readdir.c +@@ -0,0 +1,29 @@ ++#include ++#include ++#include ++#include ++ ++int ++main(int argc, char **argv) ++{ ++ DIR *dir = NULL; ++ struct dirent *entry = NULL; ++ int ret = 0; ++ char *path = NULL; ++ ++ assert(argc == 2); ++ path = argv[1]; ++ ++ dir = opendir(path); ++ if (!dir) { ++ printf("opendir(%s) failed.\n", path); ++ return -1; ++ } ++ ++ while ((entry = readdir(dir)) != NULL) { ++ } ++ if (dir) ++ closedir(dir); ++ ++ return ret; ++} +diff --git a/tests/basic/ctime/ctime-readdir.t b/tests/basic/ctime/ctime-readdir.t +new file mode 100644 +index 0000000..4564fc1 +--- /dev/null ++++ b/tests/basic/ctime/ctime-readdir.t +@@ -0,0 +1,50 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3}; ++TEST $CLI volume set $V0 performance.stat-prefetch on ++TEST $CLI volume set $V0 performance.readdir-ahead off ++TEST $CLI volume start $V0; ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++TEST mkdir $M0/dir0 ++TEST "echo hello_world > $M0/dir0/FILE" ++ ++ctime1=$(stat -c %Z $M0/dir0/FILE) ++echo "Mount change time: $ctime1" ++ ++sleep 2 ++ ++#Write to back end directly to modify ctime of backend file ++TEST "echo write_from_backend >> $B0/brick1/dir0/FILE" ++TEST "echo write_from_backend >> $B0/brick2/dir0/FILE" ++TEST "echo write_from_backend >> $B0/brick3/dir0/FILE" ++echo "Backend change time" ++echo "brick1: $(stat -c %Z $B0/brick1/dir0/FILE)" ++echo "brick2: $(stat -c %Z $B0/brick2/dir0/FILE)" ++echo "brick3: $(stat -c %Z $B0/brick3/dir0/FILE)" ++ ++#Stop and start to hit the case of no inode for readdir ++TEST umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++TEST build_tester $(dirname $0)/ctime-readdir.c ++ ++#Do readdir ++TEST ./$(dirname $0)/ctime-readdir $M0/dir0 ++ ++EXPECT "$ctime1" stat -c %Z $M0/dir0/FILE ++echo "Mount change time after readdir $(stat -c %Z $M0/dir0/FILE)" ++ ++cleanup_tester $(dirname $0)/ctime-readdir ++ ++cleanup; +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 193afc5..37e33a9 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -832,17 +832,26 @@ posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path, + + iatt_from_stat(&stbuf, &lstatbuf); + +- if (inode && priv->ctime) { +- if (!inode_locked) { +- ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf); ++ if (priv->ctime) { ++ if (inode) { ++ if (!inode_locked) { ++ ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf); ++ } else { ++ ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf); ++ } ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, ++ "posix get mdata failed on gfid: %s", ++ uuid_utoa(inode->gfid)); ++ goto out; ++ } + } else { +- ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf); +- } +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, +- "posix get mdata failed on gfid: %s", +- uuid_utoa(inode->gfid)); +- goto out; ++ ret = __posix_get_mdata_xattr(this, path, -1, NULL, &stbuf); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, ++ "posix get mdata failed on path: %s", path); ++ goto out; ++ } + } + } + +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index 0ea9099..7ff5225 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -79,6 +79,7 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, + fd_based_fop = _gf_true; + } + if (!(fd_based_fop || real_path_arg)) { ++ GF_VALIDATE_OR_GOTO(this->name, inode, out); + MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL); + if (!real_path) { + uuid_utoa_r(inode->gfid, gfid_str); +@@ -114,14 +115,14 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, + key, + real_path ? real_path + : (real_path_arg ? real_path_arg : "null"), +- uuid_utoa(inode->gfid)); ++ inode ? uuid_utoa(inode->gfid) : "null"); + } else { + gf_msg(this->name, GF_LOG_DEBUG, *op_errno, P_MSG_XATTR_FAILED, + "getxattr failed" + " on %s gfid: %s key: %s ", + real_path ? real_path + : (real_path_arg ? real_path_arg : "null"), +- uuid_utoa(inode->gfid), key); ++ inode ? uuid_utoa(inode->gfid) : "null", key); + } + op_ret = -1; + goto out; +@@ -148,7 +149,7 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, + "getxattr failed on " + " on %s gfid: %s key: %s ", + real_path ? real_path : (real_path_arg ? real_path_arg : "null"), +- uuid_utoa(inode->gfid), key); ++ inode ? uuid_utoa(inode->gfid) : "null", key); + goto out; + } + +@@ -233,9 +234,14 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + int ret = -1; + int op_errno = 0; + +- GF_VALIDATE_OR_GOTO(this->name, inode, out); ++ /* Handle readdirp: inode might be null, time attributes should be served ++ * from xattr not from backend's file attributes */ ++ if (inode) { ++ ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); ++ } else { ++ ret = -1; ++ } + +- ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); + if (ret == -1 || !mdata) { + mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); + if (!mdata) { +@@ -251,7 +257,9 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + * is hit when in-memory status is lost due to brick + * down scenario + */ +- __inode_ctx_set1(inode, this, (uint64_t *)&mdata); ++ if (inode) { ++ __inode_ctx_set1(inode, this, (uint64_t *)&mdata); ++ } + } else { + /* Failed to get mdata from disk, xattr missing. + * This happens on two cases. +@@ -278,7 +286,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + */ + gf_msg(this->name, GF_LOG_WARNING, op_errno, + P_MSG_FETCHMDATA_FAILED, "file: %s: gfid: %s key:%s ", +- real_path ? real_path : "null", uuid_utoa(inode->gfid), ++ real_path ? real_path : "null", ++ inode ? uuid_utoa(inode->gfid) : "null", + GF_XATTR_MDATA_KEY); + GF_FREE(mdata); + ret = 0; +@@ -297,6 +306,10 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + stbuf->ia_atime = mdata->atime.tv_sec; + stbuf->ia_atime_nsec = mdata->atime.tv_nsec; + } ++ /* Not set in inode context, hence free mdata */ ++ if (!inode) { ++ GF_FREE(mdata); ++ } + + out: + return ret; +@@ -416,6 +429,11 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + } + } + ++ if ((flag->ctime == 0) && (flag->mtime == 0) && (flag->atime == 0)) { ++ ret = 0; ++ goto unlock; ++ } ++ + /* Earlier, mdata was updated only if the existing time is less + * than the time to be updated. This would fail the scenarios + * where mtime can be set to any time using the syscall. Hence +@@ -486,7 +504,6 @@ out: + stbuf->ia_atime_nsec = mdata->atime.tv_nsec; + } + +- + return ret; + } + +@@ -604,10 +621,6 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + + if (priv->ctime) { + (void)posix_get_mdata_flag(frame->root->flags, &flag); +- if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { +- goto out; +- } +- + if (frame->root->ctime.tv_sec == 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed, No ctime : %s gfid:%s", real_path, +@@ -643,9 +656,6 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + + if (inode && priv->ctime) { + (void)posix_get_parent_mdata_flag(frame->root->flags, &flag); +- if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { +- goto out; +- } + ret = posix_set_mdata_xattr(this, real_path, fd, inode, + &frame->root->ctime, stbuf, &flag, + _gf_false); +@@ -655,7 +665,6 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + uuid_utoa(inode->gfid)); + } + } +-out: + return; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0123-ctime-Fix-log-repeated-logging-during-open.patch b/SOURCES/0123-ctime-Fix-log-repeated-logging-during-open.patch new file mode 100644 index 0000000..b51c436 --- /dev/null +++ b/SOURCES/0123-ctime-Fix-log-repeated-logging-during-open.patch @@ -0,0 +1,79 @@ +From 03c0395a1ead769167046713a99662bc5c5233fa Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Fri, 19 Apr 2019 11:34:37 +0530 +Subject: [PATCH 123/124] ctime: Fix log repeated logging during open + +The log "posix set mdata failed, No ctime" logged repeatedly +after the fix [1]. Those could be internal fops. This patch +fixes the same. + +[1] https://review.gluster.org/22540 + +Backport of: + > Patch: https://review.gluster.org/22591 + > fixes: bz#1701457 + > Change-Id: I42799a90b976982cedb0ca11fa224d555eb05650 + > Signed-off-by: Kotresh HR + +BUG: 1699709 +Change-Id: I42799a90b976982cedb0ca11fa224d555eb05650 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/168688 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/storage/posix/src/posix-metadata.c | 15 +++++---------- + 1 file changed, 5 insertions(+), 10 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index 7ff5225..e96f222 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -429,11 +429,6 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + } + } + +- if ((flag->ctime == 0) && (flag->mtime == 0) && (flag->atime == 0)) { +- ret = 0; +- goto unlock; +- } +- + /* Earlier, mdata was updated only if the existing time is less + * than the time to be updated. This would fail the scenarios + * where mtime can be set to any time using the syscall. Hence +@@ -621,13 +616,9 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + + if (priv->ctime) { + (void)posix_get_mdata_flag(frame->root->flags, &flag); +- if (frame->root->ctime.tv_sec == 0) { +- gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, +- "posix set mdata failed, No ctime : %s gfid:%s", real_path, +- inode ? uuid_utoa(inode->gfid) : "No inode"); ++ if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { + goto out; + } +- + ret = posix_set_mdata_xattr(this, real_path, fd, inode, + &frame->root->ctime, stbuf, &flag, + _gf_false); +@@ -656,6 +647,9 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + + if (inode && priv->ctime) { + (void)posix_get_parent_mdata_flag(frame->root->flags, &flag); ++ if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { ++ goto out; ++ } + ret = posix_set_mdata_xattr(this, real_path, fd, inode, + &frame->root->ctime, stbuf, &flag, + _gf_false); +@@ -665,6 +659,7 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + uuid_utoa(inode->gfid)); + } + } ++out: + return; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0124-spec-remove-duplicate-references-to-files.patch b/SOURCES/0124-spec-remove-duplicate-references-to-files.patch new file mode 100644 index 0000000..b8a8c8b --- /dev/null +++ b/SOURCES/0124-spec-remove-duplicate-references-to-files.patch @@ -0,0 +1,39 @@ +From e7112224eebaa91c529397a944e94254e482f48f Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Thu, 25 Apr 2019 13:07:19 +0530 +Subject: [PATCH 124/124] spec: remove duplicate references to files + +Label: DOWNSTREAM ONLY + +Change-Id: I446fbeadaaab96aa215f4fd784d951f825486008 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/168735 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + glusterfs.spec.in | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index d20b062..86a1527 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1214,7 +1214,6 @@ exit 0 + %endif + %else + %{_libdir}/pkgconfig/libgfchangelog.pc +-%{_libdir}/libgfchangelog.so + %if ( 0%{!?_without_tiering:1} ) + %{_libdir}/pkgconfig/libgfdb.pc + %endif +@@ -1469,7 +1468,6 @@ exit 0 + %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run + %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid +-%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/quotad + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/scrub +-- +1.8.3.1 + diff --git a/SOURCES/0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch b/SOURCES/0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch new file mode 100644 index 0000000..c1c49a3 --- /dev/null +++ b/SOURCES/0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch @@ -0,0 +1,75 @@ +From 0cd08d9e89f5ee86d5f4f90f0ca5c07bd290636c Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Fri, 26 Apr 2019 22:28:53 +0530 +Subject: [PATCH 125/141] glusterd: define dumpops in the xlator_api of + glusterd + +Problem: statedump is not capturing information related to glusterd + +Solution: statdump is not capturing glusterd info because +trav->dumpops is null in gf_proc_dump_single_xlator_info () +where trav is glusterd xlator object. trav->dumpops is null +because we missed to define dumpops in xlator_api of glusterd. +defining dumpops in xlator_api of glusterd fixes the issue. + +> fixes: bz#1703629 +> Change-Id: If85429ecb1ef580aced8d5b88d09fc15258bfc4c +> Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22640/ + +BUG: 1703753 +Change-Id: If85429ecb1ef580aced8d5b88d09fc15258bfc4c +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/169207 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + tests/bugs/glusterd/optimized-basic-testcases.t | 13 +++++++++++++ + xlators/mgmt/glusterd/src/glusterd.c | 1 + + 2 files changed, 14 insertions(+) + +diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t +index dd98a65..d700b5e 100644 +--- a/tests/bugs/glusterd/optimized-basic-testcases.t ++++ b/tests/bugs/glusterd/optimized-basic-testcases.t +@@ -32,6 +32,16 @@ function get_brick_host_uuid() + echo $host_uuid_list | awk '{print $1}' + } + ++function generate_statedump_and_check_for_glusterd_info { ++ pid=`pidof glusterd` ++ #remove old stale statedumps ++ cleanup_statedump $pid ++ kill -USR1 $pid ++ #Wait till the statedump is generated ++ sleep 1 ++ fname=$(ls $statedumpdir | grep -E "\.$pid\.dump\.") ++ cat $statedumpdir/$fname | grep "xlator.glusterd.priv" | wc -l ++} + + cleanup; + +@@ -279,4 +289,7 @@ mkdir -p /xyz/var/lib/glusterd/abc + TEST $CLI volume create "test" $H0:/xyz/var/lib/glusterd/abc + EXPECT 'Created' volinfo_field "test" 'Status'; + ++EXPECT "1" generate_statedump_and_check_for_glusterd_info ++ ++cleanup_statedump `pidof glusterd` + cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index d4ab630..c0973cb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -2231,6 +2231,7 @@ xlator_api_t xlator_api = { + .fini = fini, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ ++ .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, +-- +1.8.3.1 + diff --git a/SOURCES/0126-cluster-dht-refactor-dht-lookup-functions.patch b/SOURCES/0126-cluster-dht-refactor-dht-lookup-functions.patch new file mode 100644 index 0000000..25c43a0 --- /dev/null +++ b/SOURCES/0126-cluster-dht-refactor-dht-lookup-functions.patch @@ -0,0 +1,663 @@ +From 6565749c95e90f360a994bde1416cffd22cd8ce9 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Mon, 25 Mar 2019 15:56:56 +0530 +Subject: [PATCH 126/141] cluster/dht: refactor dht lookup functions + +Part 1: refactor the dht_lookup_dir_cbk +and dht_selfheal_directory functions. +Added a simple dht selfheal directory test + +upstream: https://review.gluster.org/#/c/glusterfs/+/22407/ +> Change-Id: I1410c26359e3c14b396adbe751937a52bd2fcff9 +> updates: bz#1590385 + +Change-Id: Idd0a7df7122d634c371ecf30c0dbb94dc6063416 +BUG: 1703897 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/169037 +Tested-by: RHGS Build Bot +Reviewed-by: Susant Palai +Reviewed-by: Atin Mukherjee +--- + tests/basic/distribute/dir-heal.t | 145 +++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-common.c | 178 +++++++++++++++------------------ + xlators/cluster/dht/src/dht-selfheal.c | 65 +++++++----- + 3 files changed, 264 insertions(+), 124 deletions(-) + create mode 100644 tests/basic/distribute/dir-heal.t + +diff --git a/tests/basic/distribute/dir-heal.t b/tests/basic/distribute/dir-heal.t +new file mode 100644 +index 0000000..851f765 +--- /dev/null ++++ b/tests/basic/distribute/dir-heal.t +@@ -0,0 +1,145 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../nfs.rc ++. $(dirname $0)/../../common-utils.rc ++ ++# Test 1 overview: ++# ---------------- ++# ++# 1. Kill one brick of the volume. ++# 2. Create directories and change directory properties. ++# 3. Bring up the brick and access the directory ++# 4. Check the permissions and xattrs on the backend ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3} ++TEST $CLI volume start $V0 ++ ++# We want the lookup to reach DHT ++TEST $CLI volume set $V0 performance.stat-prefetch off ++ ++# Mount using FUSE , kill a brick and create directories ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++ ++ls $M0/ ++cd $M0 ++ ++TEST kill_brick $V0 $H0 $B0/$V0-1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1 ++ ++TEST mkdir dir{1..4} ++ ++# No change for dir1 ++# Change permissions for dir2 ++# Set xattr on dir3 ++# Change permissions and set xattr on dir4 ++ ++TEST chmod 777 $M0/dir2 ++ ++TEST setfattr -n "user.test" -v "test" $M0/dir3 ++ ++TEST chmod 777 $M0/dir4 ++TEST setfattr -n "user.test" -v "test" $M0/dir4 ++ ++ ++# Start all bricks ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1 ++ ++#$CLI volume status ++ ++# It takes a while for the client to reconnect to the brick ++sleep 5 ++ ++stat $M0/dir* > /dev/null ++ ++# Check that directories have been created on the brick that was killed ++ ++TEST ls $B0/$V0-1/dir1 ++ ++TEST ls $B0/$V0-1/dir2 ++EXPECT "777" stat -c "%a" $B0/$V0-1/dir2 ++ ++TEST ls $B0/$V0-1/dir3 ++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3 ++ ++ ++TEST ls $B0/$V0-1/dir4 ++EXPECT "777" stat -c "%a" $B0/$V0-1/dir4 ++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4 ++ ++ ++TEST rm -rf $M0/* ++ ++cd ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++ ++# Test 2 overview: ++# ---------------- ++# 1. Create directories with all bricks up. ++# 2. Kill a brick and change directory properties and set user xattr. ++# 2. Bring up the brick and access the directory ++# 3. Check the permissions and xattrs on the backend ++ ++ ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++ ++ls $M0/ ++cd $M0 ++TEST mkdir dir{1..4} ++ ++TEST kill_brick $V0 $H0 $B0/$V0-1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1 ++ ++# No change for dir1 ++# Change permissions for dir2 ++# Set xattr on dir3 ++# Change permissions and set xattr on dir4 ++ ++TEST chmod 777 $M0/dir2 ++ ++TEST setfattr -n "user.test" -v "test" $M0/dir3 ++ ++TEST chmod 777 $M0/dir4 ++TEST setfattr -n "user.test" -v "test" $M0/dir4 ++ ++ ++# Start all bricks ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1 ++ ++#$CLI volume status ++ ++# It takes a while for the client to reconnect to the brick ++sleep 5 ++ ++stat $M0/dir* > /dev/null ++ ++# Check directories on the brick that was killed ++ ++TEST ls $B0/$V0-1/dir2 ++EXPECT "777" stat -c "%a" $B0/$V0-1/dir2 ++ ++TEST ls $B0/$V0-1/dir3 ++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3 ++ ++ ++TEST ls $B0/$V0-1/dir4 ++EXPECT "777" stat -c "%a" $B0/$V0-1/dir4 ++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4 ++cd ++ ++ ++# Cleanup ++cleanup ++ +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 2a68193..d3e900c 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -801,9 +801,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, + call_frame_t *xattr_frame = NULL; + gf_boolean_t vol_down = _gf_false; + +- this = frame->this; +- + GF_VALIDATE_OR_GOTO("dht", frame, out); ++ this = frame->this; + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame->local, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); +@@ -812,6 +811,7 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, + conf = this->private; + layout = local->selfheal.layout; + local->mds_heal_fresh_lookup = mark_during_fresh_lookup; ++ + gf_uuid_unparse(local->gfid, gfid_local); + + /* Code to update hashed subvol consider as a mds subvol +@@ -1240,6 +1240,31 @@ out: + } + + int ++dht_needs_selfheal(call_frame_t *frame, xlator_t *this) ++{ ++ dht_local_t *local = NULL; ++ dht_layout_t *layout = NULL; ++ int needs_selfheal = 0; ++ int ret = 0; ++ ++ local = frame->local; ++ layout = local->layout; ++ ++ if (local->need_attrheal || local->need_xattr_heal || ++ local->need_selfheal) { ++ needs_selfheal = 1; ++ } ++ ++ ret = dht_layout_normalize(this, &local->loc, layout); ++ ++ if (ret != 0) { ++ gf_msg_debug(this->name, 0, "fixing assignment on %s", local->loc.path); ++ needs_selfheal = 1; ++ } ++ return needs_selfheal; ++} ++ ++int + dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, + dict_t *xattr, struct iatt *postparent) +@@ -1256,8 +1281,6 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + char gfid_local[GF_UUID_BUF_SIZE] = {0}; + char gfid_node[GF_UUID_BUF_SIZE] = {0}; + int32_t mds_xattr_val[1] = {0}; +- call_frame_t *copy = NULL; +- dht_local_t *copy_local = NULL; + + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, out); +@@ -1270,7 +1293,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + conf = this->private; + + layout = local->layout; ++ gf_msg_debug(this->name, op_errno, ++ "%s: lookup on %s returned with op_ret = %d, op_errno = %d", ++ local->loc.path, prev->name, op_ret, op_errno); + ++ /* The first successful lookup*/ + if (!op_ret && gf_uuid_is_null(local->gfid)) { + memcpy(local->gfid, stbuf->ia_gfid, 16); + } +@@ -1298,13 +1325,10 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + if (op_ret == -1) { + local->op_errno = op_errno; +- gf_msg_debug(this->name, op_errno, +- "%s: lookup on %s returned error", local->loc.path, +- prev->name); + + /* The GFID is missing on this subvol. Force a heal. */ + if (op_errno == ENODATA) { +- local->need_selfheal = 1; ++ local->need_lookup_everywhere = 1; + } + goto unlock; + } +@@ -1312,12 +1336,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + is_dir = check_is_dir(inode, stbuf, xattr); + if (!is_dir) { + gf_msg_debug(this->name, 0, +- "lookup of %s on %s returned non" +- "dir 0%o" ++ "%s: lookup on %s returned non dir 0%o" + "calling lookup_everywhere", + local->loc.path, prev->name, stbuf->ia_type); + +- local->need_selfheal = 1; ++ local->need_lookup_everywhere = 1; + goto unlock; + } + +@@ -1328,14 +1351,8 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + dht_aggregate_xattr(local->xattr, xattr); + } + +- if (dict_get(xattr, conf->mds_xattr_key)) { +- local->mds_subvol = prev; +- local->mds_stbuf.ia_gid = stbuf->ia_gid; +- local->mds_stbuf.ia_uid = stbuf->ia_uid; +- local->mds_stbuf.ia_prot = stbuf->ia_prot; +- } +- + if (local->stbuf.ia_type != IA_INVAL) { ++ /* This is not the first subvol to respond */ + if (!__is_root_gfid(stbuf->ia_gfid) && + ((local->stbuf.ia_gid != stbuf->ia_gid) || + (local->stbuf.ia_uid != stbuf->ia_uid) || +@@ -1348,65 +1365,64 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + if (local->inode == NULL) + local->inode = inode_ref(inode); + ++ /* This could be a problem */ + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->postparent, postparent); + + if (!dict_get(xattr, conf->mds_xattr_key)) { + gf_msg_debug(this->name, 0, +- "Internal xattr %s is not present " +- " on path %s gfid is %s ", +- conf->mds_xattr_key, local->loc.path, gfid_local); ++ "%s: mds xattr %s is not present " ++ "on %s(gfid = %s)", ++ local->loc.path, conf->mds_xattr_key, prev->name, ++ gfid_local); + goto unlock; +- } else { +- /* Save mds subvol on inode ctx */ +- ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, +- DHT_MSG_SET_INODE_CTX_FAILED, +- "Failed to set hashed subvol for %s vol is %s", +- local->loc.path, prev->name); +- } ++ } ++ ++ local->mds_subvol = prev; ++ local->mds_stbuf = *stbuf; ++ ++ /* Save mds subvol on inode ctx */ ++ ++ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, ++ "%s: Failed to set mds (%s)", local->loc.path, prev->name); + } + check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, + mds_xattr_val, 1, &errst); + if ((check_mds < 0) && !errst) { + local->mds_xattr = dict_ref(xattr); + gf_msg_debug(this->name, 0, +- "Value of %s is not zero on hashed subvol " +- "so xattr needs to be heal on non hashed" +- " path is %s and vol name is %s " +- " gfid is %s", +- conf->mds_xattr_key, local->loc.path, prev->name, ++ "%s: %s is not zero on %s. Xattrs need to be healed." ++ "(gfid = %s)", ++ local->loc.path, conf->mds_xattr_key, prev->name, + gfid_local); + local->need_xattr_heal = 1; +- local->mds_subvol = prev; + } + } ++ + unlock: + UNLOCK(&frame->lock); + + this_call_cnt = dht_frame_return(frame); + + if (is_last_call(this_call_cnt)) { ++ /* If the mds subvol is not set correctly*/ ++ if (!__is_root_gfid(local->gfid) && ++ (!dict_get(local->xattr, conf->mds_xattr_key))) { ++ local->need_selfheal = 1; ++ } ++ + /* No need to call xattr heal code if volume count is 1 + */ +- if (conf->subvolume_cnt == 1) ++ if (conf->subvolume_cnt == 1) { + local->need_xattr_heal = 0; +- +- /* Code to update all extended attributed from hashed subvol +- to local->xattr +- */ +- if (local->need_xattr_heal && (local->mds_xattr)) { +- dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr, +- NULL, NULL); +- dict_unref(local->mds_xattr); +- local->mds_xattr = NULL; + } + +- if (local->need_selfheal) { +- local->need_selfheal = 0; ++ if (local->need_selfheal || local->need_lookup_everywhere) { + /* Set the gfid-req so posix will set the GFID*/ + if (!gf_uuid_is_null(local->gfid)) { ++ /* Ok, this should _never_ happen */ + ret = dict_set_static_bin(local->xattr_req, "gfid-req", + local->gfid, 16); + } else { +@@ -1414,73 +1430,36 @@ unlock: + ret = dict_set_static_bin(local->xattr_req, "gfid-req", + local->gfid_req, 16); + } ++ } ++ ++ if (local->need_lookup_everywhere) { ++ local->need_lookup_everywhere = 0; + dht_lookup_everywhere(frame, this, &local->loc); + return 0; + } + + if (local->op_ret == 0) { +- ret = dht_layout_normalize(this, &local->loc, layout); +- +- if (ret != 0) { +- gf_msg_debug(this->name, 0, "fixing assignment on %s", +- local->loc.path); ++ if (dht_needs_selfheal(frame, this)) { + goto selfheal; + } + + dht_layout_set(this, local->inode, layout); +- if (!dict_get(local->xattr, conf->mds_xattr_key) || +- local->need_xattr_heal) +- goto selfheal; +- } +- +- if (local->inode) { +- dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1); +- } +- +- if (local->loc.parent) { +- dht_inode_ctx_time_update(local->loc.parent, this, +- &local->postparent, 1); +- } +- +- if (local->need_attrheal) { +- local->need_attrheal = 0; +- if (!__is_root_gfid(inode->gfid)) { +- local->stbuf.ia_gid = local->mds_stbuf.ia_gid; +- local->stbuf.ia_uid = local->mds_stbuf.ia_uid; +- local->stbuf.ia_prot = local->mds_stbuf.ia_prot; ++ if (local->inode) { ++ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1); + } +- copy = create_frame(this, this->ctx->pool); +- if (copy) { +- copy_local = dht_local_init(copy, &local->loc, NULL, 0); +- if (!copy_local) { +- DHT_STACK_DESTROY(copy); +- goto skip_attr_heal; +- } +- copy_local->stbuf = local->stbuf; +- gf_uuid_copy(copy_local->loc.gfid, local->stbuf.ia_gfid); +- copy_local->mds_stbuf = local->mds_stbuf; +- copy_local->mds_subvol = local->mds_subvol; +- copy->local = copy_local; +- FRAME_SU_DO(copy, dht_local_t); +- ret = synctask_new(this->ctx->env, dht_dir_attr_heal, +- dht_dir_attr_heal_done, copy, copy); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, +- DHT_MSG_DIR_ATTR_HEAL_FAILED, +- "Synctask creation failed to heal attr " +- "for path %s gfid %s ", +- local->loc.path, local->gfid); +- DHT_STACK_DESTROY(copy); +- } ++ ++ if (local->loc.parent) { ++ dht_inode_ctx_time_update(local->loc.parent, this, ++ &local->postparent, 1); + } + } + +- skip_attr_heal: + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); + dht_set_fixed_dir_stat(&local->postparent); + /* Delete mds xattr at the time of STACK UNWIND */ + if (local->xattr) + GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr); ++ + DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); +@@ -5444,9 +5423,8 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, +- "Failed to get mds subvol for path %s" +- "gfid is %s ", +- loc->path, gfid_local); ++ "%s: Failed to get mds subvol. (gfid is %s)", loc->path, ++ gfid_local); + } + (*op_errno) = ENOENT; + goto err; +diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c +index bd1b7ea..5420fca 100644 +--- a/xlators/cluster/dht/src/dht-selfheal.c ++++ b/xlators/cluster/dht/src/dht-selfheal.c +@@ -1033,18 +1033,27 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf, + int missing_attr = 0; + int i = 0, ret = -1; + dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; + xlator_t *this = NULL; + int cnt = 0; + + local = frame->local; + this = frame->this; ++ conf = this->private; ++ ++ /* We need to heal the attrs if: ++ * 1. Any directories were missing - the newly created dirs will need ++ * to have the correct attrs set ++ * 2. An existing dir does not have the correct permissions -they may ++ * have been changed when a brick was down. ++ */ + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == -1) + missing_attr++; + } + +- if (missing_attr == 0) { ++ if ((missing_attr == 0) && (local->need_attrheal == 0)) { + if (!local->heal_layout) { + gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ", + loc->path, uuid_utoa(loc->gfid)); +@@ -1062,19 +1071,12 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf, + return 0; + } + +- local->call_cnt = missing_attr; +- cnt = layout->cnt; ++ cnt = local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < cnt; i++) { +- if (layout->list[i].err == -1) { +- gf_msg_trace(this->name, 0, "%s: setattr on subvol %s, gfid = %s", +- loc->path, layout->list[i].xlator->name, +- uuid_utoa(loc->gfid)); +- +- STACK_WIND( +- frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator, +- layout->list[i].xlator->fops->setattr, loc, stbuf, valid, NULL); +- } ++ STACK_WIND(frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator, ++ layout->list[i].xlator->fops->setattr, loc, stbuf, valid, ++ NULL); + } + + return 0; +@@ -1492,6 +1494,9 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, + } + + if (missing_dirs == 0) { ++ /* We don't need to create any directories. Proceed to heal the ++ * attrs and xattrs ++ */ + if (!__is_root_gfid(local->stbuf.ia_gfid)) { + if (local->need_xattr_heal) { + local->need_xattr_heal = 0; +@@ -1499,8 +1504,8 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, + if (ret) + gf_msg(this->name, GF_LOG_ERROR, ret, + DHT_MSG_DIR_XATTR_HEAL_FAILED, +- "xattr heal failed for " +- "directory %s gfid %s ", ++ "%s:xattr heal failed for " ++ "directory (gfid = %s)", + local->loc.path, local->gfid); + } else { + if (!gf_uuid_is_null(local->gfid)) +@@ -1512,8 +1517,8 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, + + gf_msg(this->name, GF_LOG_INFO, 0, + DHT_MSG_DIR_XATTR_HEAL_FAILED, +- "Failed to set mds xattr " +- "for directory %s gfid %s ", ++ "%s: Failed to set mds xattr " ++ "for directory (gfid = %s)", + local->loc.path, local->gfid); + } + } +@@ -2085,10 +2090,10 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + loc_t *loc, dht_layout_t *layout) + { + dht_local_t *local = NULL; ++ xlator_t *this = NULL; + uint32_t down = 0; + uint32_t misc = 0; + int ret = 0; +- xlator_t *this = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + char gfid[GF_UUID_BUF_SIZE] = {0}; + inode_t *linked_inode = NULL, *inode = NULL; +@@ -2099,6 +2104,11 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = dht_layout_ref(this, layout); + ++ if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) { ++ /*Use the one in the mds_stbuf*/ ++ local->stbuf = local->mds_stbuf; ++ } ++ + if (!__is_root_gfid(local->stbuf.ia_gfid)) { + gf_uuid_unparse(local->stbuf.ia_gfid, gfid); + gf_uuid_unparse(loc->parent->gfid, pgfid); +@@ -2118,6 +2128,13 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + inode_unref(inode); + } + ++ if (local->need_xattr_heal && (local->mds_xattr)) { ++ dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr, ++ NULL, NULL); ++ dict_unref(local->mds_xattr); ++ local->mds_xattr = NULL; ++ } ++ + dht_layout_anomalies(this, loc, layout, &local->selfheal.hole_cnt, + &local->selfheal.overlaps_cnt, + &local->selfheal.missing_cnt, &local->selfheal.down, +@@ -2128,18 +2145,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + + if (down) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, +- "Directory selfheal failed: %d subvolumes down." +- "Not fixing. path = %s, gfid = %s", +- down, loc->path, gfid); ++ "%s: Directory selfheal failed: %d subvolumes down." ++ "Not fixing. gfid = %s", ++ loc->path, down, gfid); + ret = 0; + goto sorry_no_fix; + } + + if (misc) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, +- "Directory selfheal failed : %d subvolumes " +- "have unrecoverable errors. path = %s, gfid = %s", +- misc, loc->path, gfid); ++ "%s: Directory selfheal failed : %d subvolumes " ++ "have unrecoverable errors. gfid = %s", ++ loc->path, misc, gfid); + + ret = 0; + goto sorry_no_fix; +@@ -2369,13 +2386,13 @@ dht_dir_attr_heal(void *data) + + frame = data; + local = frame->local; +- mds_subvol = local->mds_subvol; + this = frame->this; + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", local, out); + conf = this->private; + GF_VALIDATE_OR_GOTO("dht", conf, out); + ++ mds_subvol = local->mds_subvol; + call_cnt = conf->subvolume_cnt; + + if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) { +-- +1.8.3.1 + diff --git a/SOURCES/0127-cluster-dht-Refactor-dht-lookup-functions.patch b/SOURCES/0127-cluster-dht-Refactor-dht-lookup-functions.patch new file mode 100644 index 0000000..0d0fdb3 --- /dev/null +++ b/SOURCES/0127-cluster-dht-Refactor-dht-lookup-functions.patch @@ -0,0 +1,200 @@ +From 884ba13ee47888b5de9b6d6acaf051e895f55053 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Wed, 10 Apr 2019 14:28:55 +0530 +Subject: [PATCH 127/141] cluster/dht: Refactor dht lookup functions + +Part 2: Modify dht_revalidate_cbk to call +dht_selfheal_directory instead of separate calls +to heal attrs and xattrs. + +upstream: https://review.gluster.org/#/c/glusterfs/+/22542/ + +> Change-Id: Id41ac6c4220c2c35484812bbfc6157fc3c86b142 +> updates: bz#1590385 + +Change-Id: Id53962306dd142efc741de838b585fa5c78f9b1f +BUG:1703897 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/169038 +Tested-by: RHGS Build Bot +Reviewed-by: Susant Palai +Reviewed-by: Atin Mukherjee +--- + xlators/cluster/dht/src/dht-common.c | 104 ++++++++++------------------------- + 1 file changed, 30 insertions(+), 74 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index d3e900c..183872f 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -1365,7 +1365,6 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + if (local->inode == NULL) + local->inode = inode_ref(inode); + +- /* This could be a problem */ + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->postparent, postparent); + +@@ -1509,8 +1508,6 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int is_dir = 0; + int is_linkfile = 0; + int follow_link = 0; +- call_frame_t *copy = NULL; +- dht_local_t *copy_local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + uint32_t vol_commit_hash = 0; + xlator_t *subvol = NULL; +@@ -1538,17 +1535,16 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + gf_uuid_unparse(local->loc.gfid, gfid); + ++ gf_msg_debug(this->name, op_errno, ++ "%s: revalidate lookup on %s returned op_ret %d", ++ local->loc.path, prev->name, op_ret); ++ + LOCK(&frame->lock); + { + if (gf_uuid_is_null(local->gfid)) { + memcpy(local->gfid, local->loc.gfid, 16); + } + +- gf_msg_debug(this->name, op_errno, +- "revalidate lookup of %s " +- "returned with op_ret %d", +- local->loc.path, op_ret); +- + if (op_ret == -1) { + local->op_errno = op_errno; + +@@ -1580,6 +1576,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + local->loc.path); + + local->need_lookup_everywhere = 1; ++ } else if (IA_ISDIR(local->loc.inode->ia_type)) { ++ local->need_selfheal = 1; + } + } + +@@ -1638,15 +1636,16 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + (local->stbuf.ia_uid != stbuf->ia_uid) || + is_permission_different(&local->stbuf.ia_prot, + &stbuf->ia_prot)) { +- local->need_selfheal = 1; ++ local->need_attrheal = 1; + } + } + + if (!dict_get(xattr, conf->mds_xattr_key)) { + gf_msg_debug(this->name, 0, +- "internal xattr %s is not present" +- " on path %s gfid is %s ", +- conf->mds_xattr_key, local->loc.path, gfid); ++ "%s: internal xattr %s is not present" ++ " on subvol %s(gfid is %s)", ++ local->loc.path, conf->mds_xattr_key, prev->name, ++ gfid); + } else { + check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, + mds_xattr_val, 1, &errst); +@@ -1734,71 +1733,28 @@ unlock: + local->need_xattr_heal = 0; + + if (IA_ISDIR(local->stbuf.ia_type)) { +- /* Code to update all extended attributed from hashed +- subvol to local->xattr and call heal code to heal +- custom xattr from hashed subvol to non-hashed subvol +- */ +- if (local->need_xattr_heal && (local->mds_xattr)) { +- dht_dir_set_heal_xattr(this, local, local->xattr, +- local->mds_xattr, NULL, NULL); +- dict_unref(local->mds_xattr); +- local->mds_xattr = NULL; +- local->need_xattr_heal = 0; +- ret = dht_dir_xattr_heal(this, local); +- if (ret) +- gf_msg(this->name, GF_LOG_ERROR, ret, +- DHT_MSG_DIR_XATTR_HEAL_FAILED, +- "xattr heal failed for directory %s " +- " gfid %s ", +- local->loc.path, gfid); +- } else { +- /* Call function to save hashed subvol on inode +- ctx if internal mds xattr is not present and +- all subvols are up +- */ +- if (inode && !__is_root_gfid(inode->gfid) && (!local->op_ret)) +- (void)dht_common_mark_mdsxattr(frame, NULL, 1); +- } +- } +- if (local->need_selfheal) { +- local->need_selfheal = 0; +- if (!__is_root_gfid(inode->gfid)) { +- gf_uuid_copy(local->gfid, local->mds_stbuf.ia_gfid); +- local->stbuf.ia_gid = local->mds_stbuf.ia_gid; +- local->stbuf.ia_uid = local->mds_stbuf.ia_uid; +- local->stbuf.ia_prot = local->mds_stbuf.ia_prot; +- } else { +- gf_uuid_copy(local->gfid, local->stbuf.ia_gfid); +- local->stbuf.ia_gid = local->prebuf.ia_gid; +- local->stbuf.ia_uid = local->prebuf.ia_uid; +- local->stbuf.ia_prot = local->prebuf.ia_prot; +- } ++ if (!__is_root_gfid(local->loc.inode->gfid) && ++ (!dict_get(local->xattr, conf->mds_xattr_key))) ++ local->need_selfheal = 1; + +- copy = create_frame(this, this->ctx->pool); +- if (copy) { +- copy_local = dht_local_init(copy, &local->loc, NULL, 0); +- if (!copy_local) { +- DHT_STACK_DESTROY(copy); +- goto cont; +- } +- copy_local->stbuf = local->stbuf; +- copy_local->mds_stbuf = local->mds_stbuf; +- copy_local->mds_subvol = local->mds_subvol; +- copy->local = copy_local; +- FRAME_SU_DO(copy, dht_local_t); +- ret = synctask_new(this->ctx->env, dht_dir_attr_heal, +- dht_dir_attr_heal_done, copy, copy); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, +- DHT_MSG_DIR_ATTR_HEAL_FAILED, +- "Synctask creation failed to heal attr " +- "for path %s gfid %s ", +- local->loc.path, local->gfid); +- DHT_STACK_DESTROY(copy); ++ if (dht_needs_selfheal(frame, this)) { ++ if (!__is_root_gfid(local->loc.inode->gfid)) { ++ local->stbuf.ia_gid = local->mds_stbuf.ia_gid; ++ local->stbuf.ia_uid = local->mds_stbuf.ia_uid; ++ local->stbuf.ia_prot = local->mds_stbuf.ia_prot; ++ } else { ++ local->stbuf.ia_gid = local->prebuf.ia_gid; ++ local->stbuf.ia_uid = local->prebuf.ia_uid; ++ local->stbuf.ia_prot = local->prebuf.ia_prot; + } ++ ++ layout = local->layout; ++ dht_selfheal_directory(frame, dht_lookup_selfheal_cbk, ++ &local->loc, layout); ++ return 0; + } + } +- cont: ++ + if (local->layout_mismatch) { + /* Found layout mismatch in the directory, need to + fix this in the inode context */ +@@ -1814,7 +1770,7 @@ unlock: + dht_layout_unref(this, local->layout); + local->layout = NULL; + +- /* We know that current cached subvol is no more ++ /* We know that current cached subvol is no longer + valid, get the new one */ + local->cached_subvol = NULL; + if (local->xattr_req) { +-- +1.8.3.1 + diff --git a/SOURCES/0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch b/SOURCES/0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch new file mode 100644 index 0000000..862b828 --- /dev/null +++ b/SOURCES/0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch @@ -0,0 +1,86 @@ +From bb39abc1dab3c7b7b725f9eefe119218e94f610b Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 29 Apr 2019 18:48:36 +0530 +Subject: [PATCH 128/141] glusterd: Fix bulkvoldict thread logic in brick + multiplexing + +Problem: Currently glusterd spawn bulkvoldict in brick_mux + environment while no. of volumes are less than configured + glusterd.vol_count_per_thread + +Solution: Correct the logic to spawn bulkvoldict thread + 1) Calculate endindex only while total thread is non zero + 2) Update end index correctly to pass index for bulkvoldict + thread + +> Fixes: bz#1704252 +> Change-Id: I1def847fbdd6a605e7687bfc4e42b706bf0eb70b +> (Cherry picked from commit ac70f66c5805e10b3a1072bd467918730c0aeeb4) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22647/) + +BUG: 1704769 +Change-Id: I1def847fbdd6a605e7687bfc4e42b706bf0eb70b +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/169091 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 24 ++++++++++++++++++------ + 1 file changed, 18 insertions(+), 6 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index ff6102b..efa5a86 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3436,9 +3436,19 @@ glusterd_add_bulk_volumes_create_thread(void *data) + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + count++; +- if ((count < start) || (count > end)) ++ ++ /* Skip volumes if index count is less than start ++ index to handle volume for specific thread ++ */ ++ if (count < start) + continue; + ++ /* No need to process volume if index count is greater ++ than end index ++ */ ++ if (count > end) ++ break; ++ + ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); + if (ret) + goto out; +@@ -3499,9 +3509,11 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + totthread = 0; + } else { + totthread = volcnt / vol_per_thread_limit; +- endindex = volcnt % vol_per_thread_limit; +- if (endindex) +- totthread++; ++ if (totthread) { ++ endindex = volcnt % vol_per_thread_limit; ++ if (endindex) ++ totthread++; ++ } + } + + if (totthread == 0) { +@@ -3527,10 +3539,10 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + arg->this = this; + arg->voldict = dict_arr[i]; + arg->start = start; +- if (!endindex) { ++ if ((i + 1) != totthread) { + arg->end = ((i + 1) * vol_per_thread_limit); + } else { +- arg->end = (start + endindex); ++ arg->end = ((i * vol_per_thread_limit) + endindex); + } + th_ret = gf_thread_create_detached( + &th_id, glusterd_add_bulk_volumes_create_thread, arg, +-- +1.8.3.1 + diff --git a/SOURCES/0129-core-handle-memory-accounting-correctly.patch b/SOURCES/0129-core-handle-memory-accounting-correctly.patch new file mode 100644 index 0000000..1281d04 --- /dev/null +++ b/SOURCES/0129-core-handle-memory-accounting-correctly.patch @@ -0,0 +1,401 @@ +From f305ee93ec9dbbd679e1eb58c7c0bf8d9b5659d5 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Fri, 12 Apr 2019 13:40:59 +0200 +Subject: [PATCH 129/141] core: handle memory accounting correctly + +When a translator stops, memory accounting for that translator is not +destroyed (because there could remain memory allocated that references +it), but mutexes that coordinate updates of memory accounting were +destroyed. This caused incorrect memory accounting and even crashes in +debug mode. + +This patch also fixes some other things: + +* Reduce the number of atomic operations needed to manage memory + accounting. +* Correctly account memory when realloc() is used. +* Merge two critical sections into one. +* Cleaned the code a bit. + +Upstream patch: +> Change-Id: Id5eaee7338729b9bc52c931815ca3ff1e5a7dcc8 +> Upstream patch link : https://review.gluster.org/#/c/glusterfs/+/22554/ +> BUG: 1659334 +> Signed-off-by: Xavi Hernandez + +Change-Id: Id5eaee7338729b9bc52c931815ca3ff1e5a7dcc8 +Fixes: bz#1702270 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/169325 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + libglusterfs/src/glusterfs/xlator.h | 2 + + libglusterfs/src/libglusterfs.sym | 1 + + libglusterfs/src/mem-pool.c | 193 ++++++++++++++++-------------------- + libglusterfs/src/xlator.c | 23 +++-- + 4 files changed, 105 insertions(+), 114 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index 06152ec..8998976 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -1035,6 +1035,8 @@ gf_boolean_t + loc_is_nameless(loc_t *loc); + int + xlator_mem_acct_init(xlator_t *xl, int num_types); ++void ++xlator_mem_acct_unref(struct mem_acct *mem_acct); + int + is_gf_log_command(xlator_t *trans, const char *name, char *value); + int +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index fa2025e..cf5757c 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -1093,6 +1093,7 @@ xlator_foreach + xlator_foreach_depth_first + xlator_init + xlator_mem_acct_init ++xlator_mem_acct_unref + xlator_notify + xlator_option_info_list + xlator_option_init_bool +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 34cb87a..3934a78 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -35,61 +35,92 @@ gf_mem_acct_enable_set(void *data) + return; + } + +-int +-gf_mem_set_acct_info(xlator_t *xl, char **alloc_ptr, size_t size, uint32_t type, +- const char *typestr) ++static void * ++gf_mem_header_prepare(struct mem_header *header, size_t size) + { +- void *ptr = NULL; +- struct mem_header *header = NULL; ++ void *ptr; + +- if (!alloc_ptr) +- return -1; ++ header->size = size; + +- ptr = *alloc_ptr; ++ ptr = header + 1; + +- GF_ASSERT(xl != NULL); ++ /* data follows in this gap of 'size' bytes */ ++ *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC; + +- GF_ASSERT(xl->mem_acct != NULL); ++ return ptr; ++} + +- GF_ASSERT(type <= xl->mem_acct->num_types); ++static void * ++gf_mem_set_acct_info(struct mem_acct *mem_acct, struct mem_header *header, ++ size_t size, uint32_t type, const char *typestr) ++{ ++ struct mem_acct_rec *rec = NULL; ++ bool new_ref = false; + +- LOCK(&xl->mem_acct->rec[type].lock); +- { +- if (!xl->mem_acct->rec[type].typestr) +- xl->mem_acct->rec[type].typestr = typestr; +- xl->mem_acct->rec[type].size += size; +- xl->mem_acct->rec[type].num_allocs++; +- xl->mem_acct->rec[type].total_allocs++; +- xl->mem_acct->rec[type].max_size = max(xl->mem_acct->rec[type].max_size, +- xl->mem_acct->rec[type].size); +- xl->mem_acct->rec[type].max_num_allocs = max( +- xl->mem_acct->rec[type].max_num_allocs, +- xl->mem_acct->rec[type].num_allocs); +- } +- UNLOCK(&xl->mem_acct->rec[type].lock); ++ if (mem_acct != NULL) { ++ GF_ASSERT(type <= mem_acct->num_types); + +- GF_ATOMIC_INC(xl->mem_acct->refcnt); ++ rec = &mem_acct->rec[type]; ++ LOCK(&rec->lock); ++ { ++ if (!rec->typestr) { ++ rec->typestr = typestr; ++ } ++ rec->size += size; ++ new_ref = (rec->num_allocs == 0); ++ rec->num_allocs++; ++ rec->total_allocs++; ++ rec->max_size = max(rec->max_size, rec->size); ++ rec->max_num_allocs = max(rec->max_num_allocs, rec->num_allocs); ++ ++#ifdef DEBUG ++ list_add(&header->acct_list, &rec->obj_list); ++#endif ++ } ++ UNLOCK(&rec->lock); ++ ++ /* We only take a reference for each memory type used, not for each ++ * allocation. This minimizes the use of atomic operations. */ ++ if (new_ref) { ++ GF_ATOMIC_INC(mem_acct->refcnt); ++ } ++ } + +- header = (struct mem_header *)ptr; + header->type = type; +- header->size = size; +- header->mem_acct = xl->mem_acct; ++ header->mem_acct = mem_acct; + header->magic = GF_MEM_HEADER_MAGIC; + ++ return gf_mem_header_prepare(header, size); ++} ++ ++static void * ++gf_mem_update_acct_info(struct mem_acct *mem_acct, struct mem_header *header, ++ size_t size) ++{ ++ struct mem_acct_rec *rec = NULL; ++ ++ if (mem_acct != NULL) { ++ rec = &mem_acct->rec[header->type]; ++ LOCK(&rec->lock); ++ { ++ rec->size += size - header->size; ++ rec->total_allocs++; ++ rec->max_size = max(rec->max_size, rec->size); ++ + #ifdef DEBUG +- INIT_LIST_HEAD(&header->acct_list); +- LOCK(&xl->mem_acct->rec[type].lock); +- { +- list_add(&header->acct_list, &(xl->mem_acct->rec[type].obj_list)); +- } +- UNLOCK(&xl->mem_acct->rec[type].lock); ++ /* The old 'header' already was present in 'obj_list', but ++ * realloc() could have changed its address. We need to remove ++ * the old item from the list and add the new one. This can be ++ * done this way because list_move() doesn't use the pointers ++ * to the old location (which are not valid anymore) already ++ * present in the list, it simply overwrites them. */ ++ list_move(&header->acct_list, &rec->obj_list); + #endif +- ptr += sizeof(struct mem_header); +- /* data follows in this gap of 'size' bytes */ +- *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC; ++ } ++ UNLOCK(&rec->lock); ++ } + +- *alloc_ptr = ptr; +- return 0; ++ return gf_mem_header_prepare(header, size); + } + + void * +@@ -97,7 +128,7 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr) + { + size_t tot_size = 0; + size_t req_size = 0; +- char *ptr = NULL; ++ void *ptr = NULL; + xlator_t *xl = NULL; + + if (!THIS->ctx->mem_acct_enable) +@@ -114,16 +145,15 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr) + gf_msg_nomem("", GF_LOG_ALERT, tot_size); + return NULL; + } +- gf_mem_set_acct_info(xl, &ptr, req_size, type, typestr); + +- return (void *)ptr; ++ return gf_mem_set_acct_info(xl->mem_acct, ptr, req_size, type, typestr); + } + + void * + __gf_malloc(size_t size, uint32_t type, const char *typestr) + { + size_t tot_size = 0; +- char *ptr = NULL; ++ void *ptr = NULL; + xlator_t *xl = NULL; + + if (!THIS->ctx->mem_acct_enable) +@@ -138,84 +168,32 @@ __gf_malloc(size_t size, uint32_t type, const char *typestr) + gf_msg_nomem("", GF_LOG_ALERT, tot_size); + return NULL; + } +- gf_mem_set_acct_info(xl, &ptr, size, type, typestr); + +- return (void *)ptr; ++ return gf_mem_set_acct_info(xl->mem_acct, ptr, size, type, typestr); + } + + void * + __gf_realloc(void *ptr, size_t size) + { + size_t tot_size = 0; +- char *new_ptr; +- struct mem_header *old_header = NULL; +- struct mem_header *new_header = NULL; +- struct mem_header tmp_header; ++ struct mem_header *header = NULL; + + if (!THIS->ctx->mem_acct_enable) + return REALLOC(ptr, size); + + REQUIRE(NULL != ptr); + +- old_header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE); +- GF_ASSERT(old_header->magic == GF_MEM_HEADER_MAGIC); +- tmp_header = *old_header; +- +-#ifdef DEBUG +- int type = 0; +- size_t copy_size = 0; +- +- /* Making these changes for realloc is not straightforward. So +- * I am simulating realloc using calloc and free +- */ +- +- type = tmp_header.type; +- new_ptr = __gf_calloc(1, size, type, +- tmp_header.mem_acct->rec[type].typestr); +- if (new_ptr) { +- copy_size = (size > tmp_header.size) ? tmp_header.size : size; +- memcpy(new_ptr, ptr, copy_size); +- __gf_free(ptr); +- } +- +- /* This is not quite what the man page says should happen */ +- return new_ptr; +-#endif ++ header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE); ++ GF_ASSERT(header->magic == GF_MEM_HEADER_MAGIC); + + tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE; +- new_ptr = realloc(old_header, tot_size); +- if (!new_ptr) { ++ header = realloc(header, tot_size); ++ if (!header) { + gf_msg_nomem("", GF_LOG_ALERT, tot_size); + return NULL; + } + +- /* +- * We used to pass (char **)&ptr as the second +- * argument after the value of realloc was saved +- * in ptr, but the compiler warnings complained +- * about the casting to and forth from void ** to +- * char **. +- * TBD: it would be nice to adjust the memory accounting info here, +- * but calling gf_mem_set_acct_info here is wrong because it bumps +- * up counts as though this is a new allocation - which it's not. +- * The consequence of doing nothing here is only that the sizes will be +- * wrong, but at least the counts won't be. +- uint32_t type = 0; +- xlator_t *xl = NULL; +- type = header->type; +- xl = (xlator_t *) header->xlator; +- gf_mem_set_acct_info (xl, &new_ptr, size, type, NULL); +- */ +- +- new_header = (struct mem_header *)new_ptr; +- *new_header = tmp_header; +- new_header->size = size; +- +- new_ptr += sizeof(struct mem_header); +- /* data follows in this gap of 'size' bytes */ +- *(uint32_t *)(new_ptr + size) = GF_MEM_TRAILER_MAGIC; +- +- return (void *)new_ptr; ++ return gf_mem_update_acct_info(header->mem_acct, header, size); + } + + int +@@ -321,6 +299,7 @@ __gf_free(void *free_ptr) + void *ptr = NULL; + struct mem_acct *mem_acct; + struct mem_header *header = NULL; ++ bool last_ref = false; + + if (!THIS->ctx->mem_acct_enable) { + FREE(free_ptr); +@@ -352,16 +331,18 @@ __gf_free(void *free_ptr) + mem_acct->rec[header->type].num_allocs--; + /* If all the instances are freed up then ensure typestr is set + * to NULL */ +- if (!mem_acct->rec[header->type].num_allocs) ++ if (!mem_acct->rec[header->type].num_allocs) { ++ last_ref = true; + mem_acct->rec[header->type].typestr = NULL; ++ } + #ifdef DEBUG + list_del(&header->acct_list); + #endif + } + UNLOCK(&mem_acct->rec[header->type].lock); + +- if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) { +- FREE(mem_acct); ++ if (last_ref) { ++ xlator_mem_acct_unref(mem_acct); + } + + free: +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 5d6f8d2..022c3ed 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -736,6 +736,19 @@ xlator_mem_acct_init(xlator_t *xl, int num_types) + } + + void ++xlator_mem_acct_unref(struct mem_acct *mem_acct) ++{ ++ uint32_t i; ++ ++ if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) { ++ for (i = 0; i < mem_acct->num_types; i++) { ++ LOCK_DESTROY(&(mem_acct->rec[i].lock)); ++ } ++ FREE(mem_acct); ++ } ++} ++ ++void + xlator_tree_fini(xlator_t *xl) + { + xlator_t *top = NULL; +@@ -766,7 +779,6 @@ xlator_list_destroy(xlator_list_t *list) + int + xlator_memrec_free(xlator_t *xl) + { +- uint32_t i = 0; + struct mem_acct *mem_acct = NULL; + + if (!xl) { +@@ -775,13 +787,8 @@ xlator_memrec_free(xlator_t *xl) + mem_acct = xl->mem_acct; + + if (mem_acct) { +- for (i = 0; i < mem_acct->num_types; i++) { +- LOCK_DESTROY(&(mem_acct->rec[i].lock)); +- } +- if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) { +- FREE(mem_acct); +- xl->mem_acct = NULL; +- } ++ xlator_mem_acct_unref(mem_acct); ++ xl->mem_acct = NULL; + } + + return 0; +-- +1.8.3.1 + diff --git a/SOURCES/0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch b/SOURCES/0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch new file mode 100644 index 0000000..2bd360f --- /dev/null +++ b/SOURCES/0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch @@ -0,0 +1,117 @@ +From 01bb17a0910a638e89a44a6da4b1359123940498 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham +Date: Wed, 17 Apr 2019 12:17:27 +0530 +Subject: [PATCH 130/141] tier/test: new-tier-cmds.t fails after a glusterd + restart + +Problem: new-tier-cmds.t does a restart of gluster processes and +after the restart the bricks and the tier process takes more +time than before to come online. This causes the detach start to +fail. + +Fix: Give it enough time to come online after the restart. + +label: DOWNSTREAM ONLY + +Change-Id: I0f50b0bb77fe49ebd3a0292e190d0350d7994cfe +Signed-off-by: Hari Gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/168130 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + tests/basic/tier/new-tier-cmds.t | 45 ++++++++++++++++++++++++++-------------- + tests/volume.rc | 8 +++++++ + 2 files changed, 37 insertions(+), 16 deletions(-) + +diff --git a/tests/basic/tier/new-tier-cmds.t b/tests/basic/tier/new-tier-cmds.t +index b9c9390..92881ac 100644 +--- a/tests/basic/tier/new-tier-cmds.t ++++ b/tests/basic/tier/new-tier-cmds.t +@@ -19,14 +19,6 @@ function create_dist_tier_vol () { + TEST $CLI_1 volume tier $V0 attach replica 2 $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 $H3:$B3/${V0}_h3 $H1:$B1/${V0}_h4 $H2:$B2/${V0}_h5 $H3:$B3/${V0}_h6 + } + +-function tier_daemon_status { +- local _VAR=CLI_$1 +- local xpath_sel='//node[hostname="Tier Daemon"][path="localhost"]/status' +- ${!_VAR} --xml volume status $V0 \ +- | xmllint --xpath "$xpath_sel" - \ +- | sed -n '/.*\([0-9]*\).*/s//\1/p' +-} +- + function detach_xml_status { + $CLI_1 volume tier $V0 detach status --xml | sed -n \ + '/.*Detach tier status successful/p' | wc -l +@@ -70,7 +62,20 @@ TEST $glusterd_2; + EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; + + #after starting detach tier the detach tier status should display the status +-sleep 2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b4 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h4 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b5 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h5 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b6 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h6 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_shd_count ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_tierd_count + $CLI_1 volume status + TEST $CLI_1 volume tier $V0 detach start + +@@ -91,13 +96,21 @@ EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; + EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_b2 + EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_h2 + +-# Parsing normal output doesn't work because of line-wrap issues on our +-# regression machines, and the version of xmllint there doesn't support --xpath +-# so we can't do it that way either. In short, there's no way for us to detect +-# when we can stop waiting, so we just have to wait the maximum time every time +-# and hope any failures will show up later in the script. +-sleep $PROCESS_UP_TIMEOUT +-#XPECT_WITHIN $PROCESS_UP_TIMEOUT 1 tier_daemon_status 2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b4 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h4 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b5 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h5 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b6 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h6 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_shd_count ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_tierd_count ++$CLI_1 volume status + + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status + +diff --git a/tests/volume.rc b/tests/volume.rc +index 289b197..b326098 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -719,6 +719,14 @@ function get_snapd_count { + ps auxww | grep glusterfs | grep snapd.pid | grep -v grep | wc -l + } + ++function get_tierd_count { ++ ps auxww | grep glusterfs | grep tierd.pid | grep -v grep | wc -l ++} ++ ++function get_shd_count { ++ ps auxww | grep glusterfs | grep shd.pid | grep -v grep | wc -l ++} ++ + function drop_cache() { + case $OSTYPE in + Linux) +-- +1.8.3.1 + diff --git a/SOURCES/0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch b/SOURCES/0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch new file mode 100644 index 0000000..6238fb1 --- /dev/null +++ b/SOURCES/0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch @@ -0,0 +1,113 @@ +From a0949929282529e0e866e074721c1bdfe3928c8c Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Thu, 11 Apr 2019 12:12:12 +0530 +Subject: [PATCH 131/141] tests/dht: Test that lookups are sent post brick up + +upstream: https://review.gluster.org/#/c/glusterfs/+/22545/ + +>Change-Id: I3556793c5e9d58cc6a08644b41dc5740fab2610b +>updates: bz#1628194 + +BUG:1704562 +Change-Id: Ie45331298902bd5268c56cb29a966d8246abfd6d +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/169592 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + tests/basic/distribute/brick-down.t | 83 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 83 insertions(+) + create mode 100644 tests/basic/distribute/brick-down.t + +diff --git a/tests/basic/distribute/brick-down.t b/tests/basic/distribute/brick-down.t +new file mode 100644 +index 0000000..522ccc0 +--- /dev/null ++++ b/tests/basic/distribute/brick-down.t +@@ -0,0 +1,83 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../common-utils.rc ++. $(dirname $0)/../../dht.rc ++ ++# Test 1 overview: ++# ---------------- ++# Test whether lookups are sent after a brick comes up again ++# ++# 1. Create a 3 brick pure distribute volume ++# 2. Fuse mount the volume so the layout is set on the root ++# 3. Kill one brick and try to create a directory which hashes to that brick. ++# It should fail with EIO. ++# 4. Restart the brick that was killed. ++# 5. Do not remount the volume. Try to create the same directory as in step 3. ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3} ++TEST $CLI volume start $V0 ++ ++# We want the lookup to reach DHT ++TEST $CLI volume set $V0 performance.stat-prefetch off ++ ++# Mount using FUSE and lookup the mount so a layout is set on the brick root ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++ ++ls $M0/ ++ ++TEST mkdir $M0/level1 ++ ++# Find a dirname that will hash to the brick we are going to kill ++hashed=$V0-client-1 ++TEST dht_first_filename_with_hashsubvol "$hashed" $M0 "dir-" ++roottestdir=$fn_return_val ++ ++hashed=$V0-client-1 ++TEST dht_first_filename_with_hashsubvol "$hashed" $M0/level1 "dir-" ++level1testdir=$fn_return_val ++ ++ ++TEST kill_brick $V0 $H0 $B0/$V0-2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-2 ++ ++TEST $CLI volume status $V0 ++ ++ ++# Unmount and mount the volume again so dht has an incomplete in memory layout ++ ++umount -f $M0 ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++ ++ ++mkdir $M0/$roottestdir ++TEST [ $? -ne 0 ] ++ ++mkdir $M0/level1/$level1testdir ++TEST [ $? -ne 0 ] ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-2 ++ ++#$CLI volume status ++ ++# It takes a while for the client to reconnect to the brick ++sleep 5 ++ ++ ++mkdir $M0/$roottestdir ++TEST [ $? -eq 0 ] ++ ++mkdir $M0/$level1/level1testdir ++TEST [ $? -eq 0 ] ++ ++# Cleanup ++cleanup ++ ++ +-- +1.8.3.1 + diff --git a/SOURCES/0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch b/SOURCES/0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch new file mode 100644 index 0000000..2c7888d --- /dev/null +++ b/SOURCES/0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch @@ -0,0 +1,41 @@ +From 83d5ebd6ca68e319db86e310cf072888d0f0f1d1 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Wed, 8 May 2019 10:07:29 +0530 +Subject: [PATCH 132/141] glusterd: remove duplicate occurrence of + features.selinux from volume option table + +Label : DOWNSTREAM ONLY + +Change-Id: I0a49fece7a1fcbb9f3bbfe5806ec470aeb33ad70 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/169664 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 10aa2ae..e52de20 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3242,16 +3242,6 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "pages." + "The max value is 262144 pages i.e 1 GB and " + "the min value is 1000 pages i.e ~4 MB."}, +- {.key = VKEY_FEATURES_SELINUX, +- .voltype = "features/selinux", +- .type = NO_DOC, +- .value = "on", +- .op_version = GD_OP_VERSION_3_11_0, +- .description = "Convert security.selinux xattrs to " +- "trusted.gluster.selinux on the bricks. Recommended " +- "to have enabled when clients and/or bricks support " +- "SELinux."}, +- + #endif /* USE_GFDB */ + { + .key = "locks.trace", +-- +1.8.3.1 + diff --git a/SOURCES/0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch b/SOURCES/0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch new file mode 100644 index 0000000..88f4bd0 --- /dev/null +++ b/SOURCES/0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch @@ -0,0 +1,62 @@ +From f1f27e5839dd99389bef65f79ea491e98e6935d2 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Tue, 23 Apr 2019 18:05:36 +0530 +Subject: [PATCH 133/141] glusterd: enable fips-mode-rchecksum for new volumes + +...during volume create if the cluster op-version is >=GD_OP_VERSION_7_0. + +This option itself was introduced in GD_OP_VERSION_4_0_0 via commit 6daa65356. +We missed enabling it by default for new volume creates in that commit. +If we are to do it now safely, we need to use op version +GD_OP_VERSION_7_0 and target it for release-7. + +Patch in upstream master: https://review.gluster.org/#/c/glusterfs/+/22609/ +BUG: 1706683 +Change-Id: I7c6d4a8abe0816367e7069cb5cad01744f04858f +fixes: bz#1706683 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/169443 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index da877aa..77aa705 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -1614,10 +1614,17 @@ brick_graph_add_posix(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + gf_boolean_t pgfid_feat = _gf_false; + char *value = NULL; + xlator_t *xl = NULL; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; + + if (!graph || !volinfo || !set_dict || !brickinfo) + goto out; + ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO("glusterd", priv, out); ++ + ret = glusterd_volinfo_get(volinfo, VKEY_FEATURES_QUOTA, &value); + if (value) { + ret = gf_string2boolean(value, "a_enabled); +@@ -1661,6 +1668,12 @@ brick_graph_add_posix(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + } + } + ++ if (priv->op_version >= GD_OP_VERSION_7_0) { ++ ret = xlator_set_fixed_option(xl, "fips-mode-rchecksum", "on"); ++ if (ret) { ++ goto out; ++ } ++ } + snprintf(tmpstr, sizeof(tmpstr), "%d", brickinfo->fs_share_count); + ret = xlator_set_fixed_option(xl, "shared-brick-count", tmpstr); + out: +-- +1.8.3.1 + diff --git a/SOURCES/0134-performance-write-behind-remove-request-from-wip-lis.patch b/SOURCES/0134-performance-write-behind-remove-request-from-wip-lis.patch new file mode 100644 index 0000000..d20ca09 --- /dev/null +++ b/SOURCES/0134-performance-write-behind-remove-request-from-wip-lis.patch @@ -0,0 +1,79 @@ +From 76127f4f8f3c2bf415f66a335e7b37670cb9bd84 Mon Sep 17 00:00:00 2001 +From: Raghavendra G +Date: Fri, 3 May 2019 10:14:48 +0530 +Subject: [PATCH 134/141] performance/write-behind: remove request from wip + list in wb_writev_cbk + +There is a race in the way O_DIRECT writes are handled. Assume two +overlapping write requests w1 and w2. + +* w1 is issued and is in wb_inode->wip queue as the response is still + pending from bricks. Also wb_request_unref in wb_do_winds is not yet + invoked. + + list_for_each_entry_safe (req, tmp, tasks, winds) { + list_del_init (&req->winds); + + if (req->op_ret == -1) { + call_unwind_error_keep_stub (req->stub, req->op_ret, + req->op_errno); + } else { + call_resume_keep_stub (req->stub); + } + + wb_request_unref (req); + } + +* w2 is issued and wb_process_queue is invoked. w2 is not picked up + for winding as w1 is still in wb_inode->wip. w1 is added to todo + list and wb_writev for w2 returns. + +* response to w1 is received and invokes wb_request_unref. Assume + wb_request_unref in wb_do_winds (see point 1) is not invoked + yet. Since there is one more refcount, wb_request_unref in + wb_writev_cbk of w1 doesn't remove w1 from wip. + +* wb_process_queue is invoked as part of wb_writev_cbk of w1. But, it + fails to wind w2 as w1 is still in wip. + +* wb_requet_unref is invoked on w1 as part of wb_do_winds. w1 is + removed from all queues including w1. + +* After this point there is no invocation of wb_process_queue unless + new request is issued from application causing w2 to be hung till + the next request. + +This bug is similar to bz 1626780 and bz 1379655. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22654/ +BUG: 1702686 +Change-Id: Iaa47437613591699d4c8ad18bc0b32de6affcc31 +fixes: bz#1702686 +Signed-off-by: Raghavendra G +Reviewed-on: https://code.engineering.redhat.com/gerrit/169552 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/performance/write-behind/src/write-behind.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c +index cf302bd..70e281a 100644 +--- a/xlators/performance/write-behind/src/write-behind.c ++++ b/xlators/performance/write-behind/src/write-behind.c +@@ -1813,6 +1813,12 @@ wb_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + frame->local = NULL; + wb_inode = req->wb_inode; + ++ LOCK(&req->wb_inode->lock); ++ { ++ list_del_init(&req->wip); ++ } ++ UNLOCK(&req->wb_inode->lock); ++ + wb_request_unref(req); + + /* requests could be pending while this was in progress */ +-- +1.8.3.1 + diff --git a/SOURCES/0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch b/SOURCES/0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch new file mode 100644 index 0000000..e6d7889 --- /dev/null +++ b/SOURCES/0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch @@ -0,0 +1,56 @@ +From 677f575d2289285d2e553ddd610944856cb947db Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Fri, 10 May 2019 11:21:03 +0530 +Subject: [PATCH 135/141] geo-rep: fix incorrectly formatted authorized_keys + +There are two ways for creating secret pem pub file during geo-rep +setup. +1. gluster-georep-sshkey generate +2. gluster system:: execute gsec_create + +Below patch solves this problem for `gluster-georep-sshkey generate` +method. +Patch link: https://review.gluster.org/#/c/glusterfs/+/22246/ + +This patch is added to support old way of creating secret pem pub file +`gluster system:: execute gsec_create`. + +Problem: While Geo-rep setup when creating an ssh authorized_keys + the geo-rep setup inserts an extra space before the "ssh-rsa" label. + This gets flagged by an enterprise customer's security scan as a + security violation. +Solution: Remove extra space while creating secret key. + +Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22673/ + +>fixes: bz#1679401 +>Change-Id: I92ba7e25aaa5123dae9ebe2f3c68d14315aa5f0e +>Signed-off-by: Sunny Kumar + +BUG: 1671862 +Change-Id: I11e90c00a14a301a5d95e14b5e8984867e6ff893 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/169870 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/src/peer_gsec_create.in | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/geo-replication/src/peer_gsec_create.in b/geo-replication/src/peer_gsec_create.in +index 05c1638..6d4a484 100755 +--- a/geo-replication/src/peer_gsec_create.in ++++ b/geo-replication/src/peer_gsec_create.in +@@ -18,7 +18,7 @@ if [ "Xcontainer" = "X$1" ]; then + output1=`cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub` + output2=`cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub` + else +- output1=`echo command=\"${libexecdir}/glusterfs/gsyncd\" " "``cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub` +- output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" " "``cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub` ++ output1=`echo command=\"${libexecdir}/glusterfs/gsyncd\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub` ++ output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub` + fi + echo -e "$output1\n$output2" +-- +1.8.3.1 + diff --git a/SOURCES/0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch b/SOURCES/0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch new file mode 100644 index 0000000..403dcb3 --- /dev/null +++ b/SOURCES/0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch @@ -0,0 +1,51 @@ +From c63346dab3e5da0605bf4ddaa314253f42892c9d Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Wed, 8 May 2019 12:13:59 +0530 +Subject: [PATCH 136/141] glusterd: fix inconsistent global option output in + volume get + +volume get all all | grep & volume get all | grep +dumps two different output value for cluster.brick-multiplex and +cluster.server-quorum-ratio + +>upstream patch : https://review.gluster.org/#/c/glusterfs/+/22680/ + +>Fixes: bz#1707700 +>Change-Id: Id131734e0502aa514b84768cf67fce3c22364eae +>Signed-off-by: Atin Mukherjee + +BUG: 1706776 +Change-Id: Id131734e0502aa514b84768cf67fce3c22364eae +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/169948 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index e52de20..4b32fb6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2906,7 +2906,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = 1}, + {.key = GLUSTERD_QUORUM_RATIO_KEY, + .voltype = "mgmt/glusterd", +- .value = "0", ++ .value = "51", + .op_version = 1}, + /* changelog translator - global tunables */ + {.key = "changelog.changelog", +@@ -3547,7 +3547,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + /* Brick multiplexing options */ + {.key = GLUSTERD_BRICK_MULTIPLEX_KEY, + .voltype = "mgmt/glusterd", +- .value = "off", ++ .value = "disable", + .op_version = GD_OP_VERSION_3_10_0, + .validate_fn = validate_boolean, + .type = GLOBAL_DOC, +-- +1.8.3.1 + diff --git a/SOURCES/0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch b/SOURCES/0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch new file mode 100644 index 0000000..24dd1db --- /dev/null +++ b/SOURCES/0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch @@ -0,0 +1,160 @@ +From 646292b4f73bf1b506d034b85787f794963d7196 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Mon, 6 May 2019 23:35:08 +0530 +Subject: [PATCH 137/141] shd/glusterd: Serialize shd manager to prevent race + condition + +At the time of a glusterd restart, while doing a handshake +there is a possibility that multiple shd manager might get +executed. Because of this, there is a chance that multiple +shd get spawned during a glusterd restart + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22667/ + +>Change-Id: Ie20798441e07d7d7a93b7d38dfb924cea178a920 +>fixes: bz#1707081 +>Signed-off-by: Mohammed Rafi KC + +BUG: 1704851 +Change-Id: Ie20798441e07d7d7a93b7d38dfb924cea178a920 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/169947 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../serialize-shd-manager-glusterd-restart.t | 54 ++++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 14 ++++++ + xlators/mgmt/glusterd/src/glusterd.c | 1 + + xlators/mgmt/glusterd/src/glusterd.h | 3 ++ + 4 files changed, 72 insertions(+) + create mode 100644 tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t + +diff --git a/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t +new file mode 100644 +index 0000000..3a27c2a +--- /dev/null ++++ b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t +@@ -0,0 +1,54 @@ ++#! /bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../cluster.rc ++ ++function check_peers { ++count=`$CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l` ++echo $count ++} ++ ++function check_shd { ++ps aux | grep $1 | grep glustershd | wc -l ++} ++ ++cleanup ++ ++ ++TEST launch_cluster 6 ++ ++TESTS_EXPECTED_IN_LOOP=25 ++for i in $(seq 2 6); do ++ hostname="H$i" ++ TEST $CLI_1 peer probe ${!hostname} ++done ++ ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers; ++for i in $(seq 1 5); do ++ ++ TEST $CLI_1 volume create ${V0}_$i replica 3 $H1:$B1/${V0}_$i $H2:$B2/${V0}_$i $H3:$B3/${V0}_$i $H4:$B4/${V0}_$i $H5:$B5/${V0}_$i $H6:$B6/${V0}_$i ++ TEST $CLI_1 volume start ${V0}_$i force ++ ++done ++ ++#kill a node ++TEST kill_node 3 ++ ++TEST $glusterd_3; ++EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_shd $H3 ++ ++for i in $(seq 1 5); do ++ ++ TEST $CLI_1 volume stop ${V0}_$i ++ TEST $CLI_1 volume delete ${V0}_$i ++ ++done ++ ++for i in $(seq 1 6); do ++ hostname="H$i" ++ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 check_shd ${!hostname} ++done ++cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index a9eab42..75f9a07 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -254,14 +254,26 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + { + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; ++ glusterd_conf_t *conf = NULL; ++ gf_boolean_t shd_restart = _gf_false; + ++ conf = THIS->private; + volinfo = data; ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); + + if (volinfo) + glusterd_volinfo_ref(volinfo); + ++ while (conf->restart_shd) { ++ synclock_unlock(&conf->big_lock); ++ sleep(2); ++ synclock_lock(&conf->big_lock); ++ } ++ conf->restart_shd = _gf_true; ++ shd_restart = _gf_true; ++ + ret = glusterd_shdsvc_create_volfile(volinfo); + if (ret) + goto out; +@@ -310,6 +322,8 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + } + } + out: ++ if (shd_restart) ++ conf->restart_shd = _gf_false; + if (volinfo) + glusterd_volinfo_unref(volinfo); + if (ret) +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index c0973cb..6d7dd4a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1819,6 +1819,7 @@ init(xlator_t *this) + conf->rpc = rpc; + conf->uds_rpc = uds_rpc; + conf->gfs_mgmt = &gd_brick_prog; ++ conf->restart_shd = _gf_false; + this->private = conf; + /* conf->workdir and conf->rundir are smaller than PATH_MAX; gcc's + * snprintf checking will throw an error here if sprintf is used. +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index bd9f509..2ea8560 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -222,6 +222,9 @@ typedef struct { + gf_atomic_t blockers; + uint32_t mgmt_v3_lock_timeout; + gf_boolean_t restart_bricks; ++ gf_boolean_t restart_shd; /* This flag prevents running two shd manager ++ simultaneously ++ */ + pthread_mutex_t attach_lock; /* Lock can be per process or a common one */ + pthread_mutex_t volume_lock; /* We release the big_lock from lot of places + which might lead the modification of volinfo +-- +1.8.3.1 + diff --git a/SOURCES/0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch b/SOURCES/0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch new file mode 100644 index 0000000..9b8bb86 --- /dev/null +++ b/SOURCES/0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch @@ -0,0 +1,64 @@ +From d08083d057d6cc7136128cad6ecefba43b886c4c Mon Sep 17 00:00:00 2001 +From: Vishal Pandey +Date: Thu, 9 May 2019 14:37:22 +0530 +Subject: [PATCH 138/141] glusterd: Add gluster volume stop operation to + glusterd_validate_quorum() + +ISSUE: gluster volume stop succeeds even if quorum is not met. + +Fix: Add GD_OP_STOP_VOLUME to gluster_validate_quorum in +glusterd_mgmt_v3_pre_validate (). + +Since the volume stop command has been ported from synctask to mgmt_v3, +the quorum check was missed out. + +>upstream patch : https://review.gluster.org/#/c/glusterfs/+/22692/ + +>Change-Id: I7a634ad89ec2e286ea262d7952061efad5360042 +>fixes: bz#1690753 +>Signed-off-by: Vishal Pandey + +BUG: 1706893 +Change-Id: I7a634ad89ec2e286ea262d7952061efad5360042 +Signed-off-by: Vishal Pandey +Reviewed-on: https://code.engineering.redhat.com/gerrit/169949 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/glusterd/quorum-validation.t | 4 +++- + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 2 +- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/tests/bugs/glusterd/quorum-validation.t b/tests/bugs/glusterd/quorum-validation.t +index 05aef4e..ff46729 100644 +--- a/tests/bugs/glusterd/quorum-validation.t ++++ b/tests/bugs/glusterd/quorum-validation.t +@@ -34,9 +34,11 @@ TEST ! $CLI_1 volume add-brick $V0 $H1:$B1/${V0}2 + TEST ! $CLI_1 volume remove-brick $V0 $H1:$B1/${V0}0 start + TEST ! $CLI_1 volume set $V0 barrier enable + +-# Now execute a command which goes through op state machine and it should fail + TEST ! $CLI_1 volume profile $V0 start + ++#bug-1690753 - Volume stop when quorum not met is successful ++TEST ! $CLI_1 volume stop $V0 ++ + #Bring back the 2nd glusterd + TEST $glusterd_2 + +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index 61ad66e..ec78913 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -1059,7 +1059,7 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict, + goto out; + } + +- if (op == GD_OP_PROFILE_VOLUME) { ++ if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME) { + ret = glusterd_validate_quorum(this, op, req_dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET, +-- +1.8.3.1 + diff --git a/SOURCES/0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch b/SOURCES/0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch new file mode 100644 index 0000000..4f8ec9c --- /dev/null +++ b/SOURCES/0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch @@ -0,0 +1,300 @@ +From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Mon, 29 Apr 2019 13:22:32 +0530 +Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec + fini + +We were not properly cleaning self-heal daemon resources +during ec fini. With shd multiplexing, it is absolutely +necessary to cleanup all the resources during ec fini. + +Back port of + upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/ + >Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2 + >fixes: bz#1703948 + >Signed-off-by: Mohammed Rafi KC + +BUG: 1703434 +Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/169994 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/syncop-utils.c | 2 + + xlators/cluster/afr/src/afr-self-heald.c | 5 +++ + xlators/cluster/ec/src/ec-heald.c | 77 +++++++++++++++++++++++++++----- + xlators/cluster/ec/src/ec-heald.h | 3 ++ + xlators/cluster/ec/src/ec-messages.h | 3 +- + xlators/cluster/ec/src/ec.c | 47 +++++++++++++++++++ + 6 files changed, 124 insertions(+), 13 deletions(-) + +diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c +index b842142..4167db4 100644 +--- a/libglusterfs/src/syncop-utils.c ++++ b/libglusterfs/src/syncop-utils.c +@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, + + if (frame) { + this = frame->this; ++ } else { ++ this = THIS; + } + + /*For this functionality to be implemented in general, we need +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 8bc4720..522fe5d 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + afr_private_t *priv = NULL; + + priv = this->private; ++ ++ if (this->cleanup_starting) { ++ return -ENOTCONN; ++ } ++ + if (!priv->shd.enabled) + return -EBUSY; + +diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c +index cba111a..edf5e11 100644 +--- a/xlators/cluster/ec/src/ec-heald.c ++++ b/xlators/cluster/ec/src/ec-heald.c +@@ -71,6 +71,11 @@ disabled_loop: + break; + } + ++ if (ec->shutdown) { ++ healer->running = _gf_false; ++ return -1; ++ } ++ + ret = healer->rerun; + healer->rerun = 0; + +@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer) + goto out; + } + ++ _mask_cancellation(); + ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD, + healer, ec_shd_index_heal, xdata, + ec->shd.max_threads, ec->shd.wait_qlength); ++ _unmask_cancellation(); + out: + if (xdata) + dict_unref(xdata); +@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + int ret = 0; + + ec = this->private; ++ ++ if (this->cleanup_starting) { ++ return -ENOTCONN; ++ } ++ + if (ec->xl_up_count <= ec->fragments) { + return -ENOTCONN; + } +@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode) + { + ec_t *ec = NULL; + loc_t loc = {0}; ++ int ret = -1; + + ec = healer->this->private; + loc.inode = inode; +- return syncop_ftw(ec->xl_list[healer->subvol], &loc, +- GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal); ++ _mask_cancellation(); ++ ret = syncop_ftw(ec->xl_list[healer->subvol], &loc, ++ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal); ++ _unmask_cancellation(); ++ return ret; + } + + void * +@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data) + { + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; ++ int run = 0; + + healer = data; + THIS = this = healer->this; + ec_t *ec = this->private; + + for (;;) { +- ec_shd_healer_wait(healer); ++ run = ec_shd_healer_wait(healer); ++ if (run == -1) ++ break; + + if (ec->xl_up_count > ec->fragments) { + gf_msg_debug(this->name, 0, "starting index sweep on subvol %s", +@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data) + + rootloc.inode = this->itable->root; + for (;;) { +- pthread_mutex_lock(&healer->mutex); +- { +- run = __ec_shd_healer_wait(healer); +- if (!run) +- healer->running = _gf_false; +- } +- pthread_mutex_unlock(&healer->mutex); +- +- if (!run) ++ run = ec_shd_healer_wait(healer); ++ if (run < 0) { + break; ++ } else if (run == 0) { ++ continue; ++ } + + if (ec->xl_up_count > ec->fragments) { + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START, +@@ -562,3 +577,41 @@ out: + dict_del(output, this->name); + return ret; + } ++ ++void ++ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer) ++{ ++ if (!healer) ++ return; ++ ++ pthread_cond_destroy(&healer->cond); ++ pthread_mutex_destroy(&healer->mutex); ++} ++ ++void ++ec_selfheal_daemon_fini(xlator_t *this) ++{ ++ struct subvol_healer *healer = NULL; ++ ec_self_heald_t *shd = NULL; ++ ec_t *priv = NULL; ++ int i = 0; ++ ++ priv = this->private; ++ if (!priv) ++ return; ++ ++ shd = &priv->shd; ++ if (!shd->iamshd) ++ return; ++ ++ for (i = 0; i < priv->nodes; i++) { ++ healer = &shd->index_healers[i]; ++ ec_destroy_healer_object(this, healer); ++ ++ healer = &shd->full_healers[i]; ++ ec_destroy_healer_object(this, healer); ++ } ++ ++ GF_FREE(shd->index_healers); ++ GF_FREE(shd->full_healers); ++} +diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h +index 2eda2a7..8184cf4 100644 +--- a/xlators/cluster/ec/src/ec-heald.h ++++ b/xlators/cluster/ec/src/ec-heald.h +@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this); + void + ec_shd_index_healer_wake(ec_t *ec); + ++void ++ec_selfheal_daemon_fini(xlator_t *this); ++ + #endif /* __EC_HEALD_H__ */ +diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h +index 7c28808..ce299bb 100644 +--- a/xlators/cluster/ec/src/ec-messages.h ++++ b/xlators/cluster/ec/src/ec-messages.h +@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL, + EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE, + EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED, + EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED, +- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED); ++ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED, ++ EC_MSG_THREAD_CLEANUP_FAILED); + + #endif /* !_EC_MESSAGES_H_ */ +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 3c8013e..264582a 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec) + } + + void ++ec_cleanup_healer_object(ec_t *ec) ++{ ++ struct subvol_healer *healer = NULL; ++ ec_self_heald_t *shd = NULL; ++ void *res = NULL; ++ int i = 0; ++ gf_boolean_t is_join = _gf_false; ++ ++ shd = &ec->shd; ++ if (!shd->iamshd) ++ return; ++ ++ for (i = 0; i < ec->nodes; i++) { ++ healer = &shd->index_healers[i]; ++ pthread_mutex_lock(&healer->mutex); ++ { ++ healer->rerun = 1; ++ if (healer->running) { ++ pthread_cond_signal(&healer->cond); ++ is_join = _gf_true; ++ } ++ } ++ pthread_mutex_unlock(&healer->mutex); ++ if (is_join) { ++ pthread_join(healer->thread, &res); ++ is_join = _gf_false; ++ } ++ ++ healer = &shd->full_healers[i]; ++ pthread_mutex_lock(&healer->mutex); ++ { ++ healer->rerun = 1; ++ if (healer->running) { ++ pthread_cond_signal(&healer->cond); ++ is_join = _gf_true; ++ } ++ } ++ pthread_mutex_unlock(&healer->mutex); ++ if (is_join) { ++ pthread_join(healer->thread, &res); ++ is_join = _gf_false; ++ } ++ } ++} ++void + ec_pending_fops_completed(ec_t *ec) + { + if (ec->shutdown) { +@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) + /* If there aren't pending fops running after we have waken up + * them, we immediately propagate the notification. */ + propagate = ec_disable_delays(ec); ++ ec_cleanup_healer_object(ec); + goto unlock; + } + +@@ -759,6 +805,7 @@ failed: + void + fini(xlator_t *this) + { ++ ec_selfheal_daemon_fini(this); + __ec_destroy_private(this); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch b/SOURCES/0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch new file mode 100644 index 0000000..b4b44e8 --- /dev/null +++ b/SOURCES/0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch @@ -0,0 +1,40 @@ +From 40bd6e9c186adb427e136a84eaab631e6a6f5263 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Sun, 5 May 2019 21:17:24 +0530 +Subject: [PATCH 140/141] cluster/ec: Reopen shouldn't happen with O_TRUNC + +Problem: +Doing re-open with O_TRUNC will truncate the fragment even when it is not +needed needing extra heals + +Fix: +At the time of re-open don't use O_TRUNC. + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/22660/ + +fixes bz#1706549 +Change-Id: Idc6408968efaad897b95a5a52481c66e843d3fb8 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/169982 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec-common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 1454ae2..b1ba5e9 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -128,7 +128,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask) + } else { + ec_open(fop->frame, fop->xl, need_open, + EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc, +- fop->fd->flags, fop->fd, NULL); ++ fop->fd->flags & (~O_TRUNC), fop->fd, NULL); + } + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0141-socket-ssl-fix-crl-handling.patch b/SOURCES/0141-socket-ssl-fix-crl-handling.patch new file mode 100644 index 0000000..4c51ad0 --- /dev/null +++ b/SOURCES/0141-socket-ssl-fix-crl-handling.patch @@ -0,0 +1,295 @@ +From e3020e43344ddbc32e62e06bbbf88a4f5d7cdc82 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Fri, 10 May 2019 11:13:45 +0530 +Subject: [PATCH 141/141] socket/ssl: fix crl handling + +Problem: +Just setting the path to the CRL directory in socket_init() wasn't working. + +Solution: +Need to use special API to retrieve and set X509_VERIFY_PARAM and set +the CRL checking flags explicitly. +Also, setting the CRL checking flags is a big pain, since the connection +is declared as failed if any CRL isn't found in the designated file or +directory. A comment has been added to the code appropriately. + +> Change-Id: I8a8ed2ddaf4b5eb974387d2f7b1a85c1ca39fe79 +> fixes: bz#1687326 +> Signed-off-by: Milind Changire +> (Cherry pick from commit 06fa261207f0f0625c52fa977b96e5875e9a91e0) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22334/) + +Change-Id: I0958e9890035fd376f1e1eafc1452caf3edd184b +BUG: 1583585 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/166458 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + configure.ac | 2 + + rpc/rpc-transport/socket/src/socket.c | 110 ++++++++++++++++++++++++++++------ + rpc/rpc-transport/socket/src/socket.h | 2 + + tests/features/ssl-ciphers.t | 13 +++- + 4 files changed, 107 insertions(+), 20 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 3065077..0e11d4c 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -491,6 +491,8 @@ AC_CHECK_HEADERS([openssl/dh.h]) + + AC_CHECK_HEADERS([openssl/ecdh.h]) + ++AC_CHECK_LIB([ssl], [SSL_CTX_get0_param], [AC_DEFINE([HAVE_SSL_CTX_GET0_PARAM], [1], [define if found OpenSSL SSL_CTX_get0_param])]) ++ + dnl Math library + AC_CHECK_LIB([m], [pow], [MATH_LIB='-lm'], [MATH_LIB='']) + AC_SUBST(MATH_LIB) +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index f6de1d3..bf2fa71 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -308,8 +308,65 @@ out: + #define ssl_write_one(t, b, l) \ + ssl_do((t), (b), (l), (SSL_trinary_func *)SSL_write) + ++/* set crl verify flags only for server */ ++/* see man X509_VERIFY_PARAM_SET_FLAGS(3) ++ * X509_V_FLAG_CRL_CHECK enables CRL checking for the certificate chain ++ * leaf certificate. An error occurs if a suitable CRL cannot be found. ++ * Since we're never going to revoke a gluster node cert, we better disable ++ * CRL check for server certs to avoid getting error and failed connection ++ * attempts. ++ */ ++static void ++ssl_clear_crl_verify_flags(SSL_CTX *ssl_ctx) ++{ ++#ifdef X509_V_FLAG_CRL_CHECK_ALL ++#ifdef HAVE_SSL_CTX_GET0_PARAM ++ X509_VERIFY_PARAM *vpm; ++ ++ vpm = SSL_CTX_get0_param(ssl_ctx); ++ if (vpm) { ++ X509_VERIFY_PARAM_clear_flags( ++ vpm, (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL)); ++ } ++#else ++ /* CRL verify flag need not be cleared for rhel6 kind of clients */ ++#endif ++#else ++ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL"); ++#endif ++ return; ++} ++ ++/* set crl verify flags only for server */ ++static void ++ssl_set_crl_verify_flags(SSL_CTX *ssl_ctx) ++{ ++#ifdef X509_V_FLAG_CRL_CHECK_ALL ++#ifdef HAVE_SSL_CTX_GET0_PARAM ++ X509_VERIFY_PARAM *vpm; ++ ++ vpm = SSL_CTX_get0_param(ssl_ctx); ++ if (vpm) { ++ unsigned long flags; ++ ++ flags = X509_VERIFY_PARAM_get_flags(vpm); ++ flags |= (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL); ++ X509_VERIFY_PARAM_set_flags(vpm, flags); ++ } ++#else ++ X509_STORE *x509store; ++ ++ x509store = SSL_CTX_get_cert_store(ssl_ctx); ++ X509_STORE_set_flags(x509store, ++ X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL); ++#endif ++#else ++ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL"); ++#endif ++} ++ + int +-ssl_setup_connection_prefix(rpc_transport_t *this) ++ssl_setup_connection_prefix(rpc_transport_t *this, gf_boolean_t server) + { + int ret = -1; + socket_private_t *priv = NULL; +@@ -332,6 +389,9 @@ ssl_setup_connection_prefix(rpc_transport_t *this) + priv->ssl_accepted = _gf_false; + priv->ssl_context_created = _gf_false; + ++ if (!server && priv->crl_path) ++ ssl_clear_crl_verify_flags(priv->ssl_ctx); ++ + priv->ssl_ssl = SSL_new(priv->ssl_ctx); + if (!priv->ssl_ssl) { + gf_log(this->name, GF_LOG_ERROR, "SSL_new failed"); +@@ -2664,7 +2724,7 @@ ssl_handle_server_connection_attempt(rpc_transport_t *this) + fd = priv->sock; + + if (!priv->ssl_context_created) { +- ret = ssl_setup_connection_prefix(this); ++ ret = ssl_setup_connection_prefix(this, _gf_true); + if (ret < 0) { + gf_log(this->name, GF_LOG_TRACE, + "> ssl_setup_connection_prefix() failed!"); +@@ -2718,7 +2778,7 @@ ssl_handle_client_connection_attempt(rpc_transport_t *this) + ret = -1; + } else { + if (!priv->ssl_context_created) { +- ret = ssl_setup_connection_prefix(this); ++ ret = ssl_setup_connection_prefix(this, _gf_false); + if (ret < 0) { + gf_log(this->name, GF_LOG_TRACE, + "> ssl_setup_connection_prefix() " +@@ -3085,7 +3145,30 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + gf_log(this->name, GF_LOG_TRACE, "XXX server:%s, client:%s", + new_trans->myinfo.identifier, new_trans->peerinfo.identifier); + ++ /* Make options available to local socket_init() to create new ++ * SSL_CTX per transport. A separate SSL_CTX per transport is ++ * required to avoid setting crl checking options for client ++ * connections. The verification options eventually get copied ++ * to the SSL object. Unfortunately, there's no way to identify ++ * whether socket_init() is being called after a client-side ++ * connect() or a server-side accept(). Although, we could pass ++ * a flag from the transport init() to the socket_init() and ++ * from this place, this doesn't identify the case where the ++ * server-side transport loading is done for the first time. ++ * Also, SSL doesn't apply for UNIX sockets. ++ */ ++ if (new_sockaddr.ss_family != AF_UNIX) ++ new_trans->options = dict_ref(this->options); ++ new_trans->ctx = this->ctx; ++ + ret = socket_init(new_trans); ++ ++ /* reset options to NULL to avoid double free */ ++ if (new_sockaddr.ss_family != AF_UNIX) { ++ dict_unref(new_trans->options); ++ new_trans->options = NULL; ++ } ++ + if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "initialization of new_trans " +@@ -4150,7 +4233,6 @@ ssl_setup_connection_params(rpc_transport_t *this) + char *cipher_list = DEFAULT_CIPHER_LIST; + char *dh_param = DEFAULT_DH_PARAM; + char *ec_curve = DEFAULT_EC_CURVE; +- char *crl_path = NULL; + + priv = this->private; + +@@ -4192,6 +4274,7 @@ ssl_setup_connection_params(rpc_transport_t *this) + } + priv->ssl_ca_list = gf_strdup(priv->ssl_ca_list); + ++ optstr = NULL; + if (dict_get_str(this->options, SSL_CRL_PATH_OPT, &optstr) == 0) { + if (!priv->ssl_enabled) { + gf_log(this->name, GF_LOG_WARNING, +@@ -4199,9 +4282,9 @@ ssl_setup_connection_params(rpc_transport_t *this) + SSL_ENABLED_OPT); + } + if (strcasecmp(optstr, "NULL") == 0) +- crl_path = NULL; ++ priv->crl_path = NULL; + else +- crl_path = optstr; ++ priv->crl_path = gf_strdup(optstr); + } + + gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG, +@@ -4343,24 +4426,15 @@ ssl_setup_connection_params(rpc_transport_t *this) + } + + if (!SSL_CTX_load_verify_locations(priv->ssl_ctx, priv->ssl_ca_list, +- crl_path)) { ++ priv->crl_path)) { + gf_log(this->name, GF_LOG_ERROR, "could not load CA list"); + goto err; + } + + SSL_CTX_set_verify_depth(priv->ssl_ctx, cert_depth); + +- if (crl_path) { +-#ifdef X509_V_FLAG_CRL_CHECK_ALL +- X509_STORE *x509store; +- +- x509store = SSL_CTX_get_cert_store(priv->ssl_ctx); +- X509_STORE_set_flags( +- x509store, X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL); +-#else +- gf_log(this->name, GF_LOG_ERROR, +- "OpenSSL version does not support CRL"); +-#endif ++ if (priv->crl_path) { ++ ssl_set_crl_verify_flags(priv->ssl_ctx); + } + + priv->ssl_session_id = session_id++; +diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h +index e1ccae2..e7c0090 100644 +--- a/rpc/rpc-transport/socket/src/socket.h ++++ b/rpc/rpc-transport/socket/src/socket.h +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #ifdef HAVE_OPENSSL_DH_H + #include + #endif +@@ -246,6 +247,7 @@ typedef struct { + char *ssl_own_cert; + char *ssl_private_key; + char *ssl_ca_list; ++ char *crl_path; + int pipe[2]; + struct gf_sock_incoming incoming; + /* -1 = not connected. 0 = in progress. 1 = connected */ +diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t +index 563d37c..7e1e199 100644 +--- a/tests/features/ssl-ciphers.t ++++ b/tests/features/ssl-ciphers.t +@@ -175,8 +175,6 @@ BRICK_PORT=`brick_port $V0` + EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + # test revocation +-# no need to restart the volume since the options are used +-# by the client here. + TEST $CLI volume set $V0 ssl.crl-path $TMPDIR + EXPECT $TMPDIR volume_option $V0 ssl.crl-path + $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 +@@ -189,14 +187,25 @@ TEST openssl ca -batch -config $SSL_CFG -revoke $SSL_CERT 2>&1 + TEST openssl ca -config $SSL_CFG -gencrl -out $SSL_CRL 2>&1 + + # Failed once revoked ++# Although client fails to mount without restarting the server after crl-path ++# is set when no actual crl file is found on the client, it would also fail ++# when server is restarted for the same reason. Since the socket initialization ++# code is the same for client and server, the crl verification flags need to ++# be turned off for the client to avoid SSL searching for CRLs in the ++# ssl.crl-path. If no CRL files are found in the ssl.crl-path, SSL fails the ++# connect() attempt on the client. ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 + $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 + EXPECT "N" wait_mount $M0 + TEST ! test -f $TEST_FILE + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + + # Succeed with CRL disabled ++TEST $CLI volume stop $V0 + TEST $CLI volume set $V0 ssl.crl-path NULL + EXPECT NULL volume_option $V0 ssl.crl-path ++TEST $CLI volume start $V0 + $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 + EXPECT "Y" wait_mount $M0 + TEST test -f $TEST_FILE +-- +1.8.3.1 + diff --git a/SOURCES/0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch b/SOURCES/0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch new file mode 100644 index 0000000..ec6ed8a --- /dev/null +++ b/SOURCES/0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch @@ -0,0 +1,36 @@ +From e44b75fdb86dcf759204816c873b4f9f4efbefa8 Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Tue, 21 May 2019 16:17:09 +0530 +Subject: [PATCH 142/169] lock: check null value of dict to avoid log flooding + +> updates: bz#1712322 +> Change-Id: I120a1d23506f9ebcf88c7ea2f2eff4978a61cf4a +> Signed-off-by: Susant Palai +(backport of fix https://review.gluster.org/#/c/glusterfs/+/22756/) + +BUG: bz#1704181 +Change-Id: I2a192236328ebb39666ffef1146df312c08a377d +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/171325 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/locks/src/posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 3f1c7a7..adb0df5 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -121,7 +121,7 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + + #define PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, fd, priv) \ + do { \ +- if (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK) || \ ++ if ((dict && (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK))) || \ + (name && (strcmp(name, GF_ENFORCE_MANDATORY_LOCK) == 0))) { \ + inode_t *__inode = (loc ? loc->inode : fd->inode); \ + pl_inode_t *__pl_inode = pl_inode_get(this, __inode, NULL); \ +-- +1.8.3.1 + diff --git a/SOURCES/0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch b/SOURCES/0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch new file mode 100644 index 0000000..fd25a69 --- /dev/null +++ b/SOURCES/0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch @@ -0,0 +1,42 @@ +From 43fb1d9d3890c44108b466d308177428fb8217aa Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Mon, 27 May 2019 10:11:39 +0530 +Subject: [PATCH 143/169] packaging : Change the dependency on nfs-ganesha to + 2.7 for glusterfs-ganesha + +Change-Id: I16a3f32eddfcbf745d67de9dc7440e2fc6ef2315 +fixes: bz#1714078 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/171471 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 86a1527..ed58356 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -460,7 +460,7 @@ Summary: NFS-Ganesha configuration + Group: Applications/File + + Requires: %{name}-server%{?_isa} = %{version}-%{release} +-Requires: nfs-ganesha-gluster >= 2.4.1 ++Requires: nfs-ganesha-gluster >= 2.7.3 + Requires: pcs, dbus + %if ( 0%{?rhel} && 0%{?rhel} == 6 ) + Requires: cman, pacemaker, corosync +@@ -1933,6 +1933,9 @@ fi + %endif + + %changelog ++* Mon May 27 2019 Jiffin Tony Thottan ++- Change the dependency to 2.7.3 on nfs-ganesha for glusterfs-ganesha (#1714078) ++ + * Sun Apr 7 2019 Jiffin Tony Thottan + - DOWNSTREAM ONLY - revert of 83abcb(gnfs in an optional subpackage) + +-- +1.8.3.1 + diff --git a/SOURCES/0144-cluster-ec-honor-contention-notifications-for-partia.patch b/SOURCES/0144-cluster-ec-honor-contention-notifications-for-partia.patch new file mode 100644 index 0000000..40a6aa8 --- /dev/null +++ b/SOURCES/0144-cluster-ec-honor-contention-notifications-for-partia.patch @@ -0,0 +1,114 @@ +From ff8a74250209f4279f67dd89c3e57b2289a1b7d1 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Thu, 9 May 2019 11:07:18 +0200 +Subject: [PATCH 144/169] cluster/ec: honor contention notifications for + partially acquired locks + +EC was ignoring lock contention notifications received while a lock was +being acquired. When a lock is partially acquired (some bricks have +granted the lock but some others not yet) we can receive notifications +from acquired bricks, which should be honored, since we may not receive +more notifications after that. + +Since EC was ignoring them, once the lock was acquired, it was not +released until the eager-lock timeout, causing unnecessary delays on +other clients. + +This fix takes into consideration the notifications received before +having completed the full lock acquisition. After that, the lock will +be releaed as soon as possible. + +Upstream patch: +> BUG: 1708156 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22690 +> Change-Id: I2a306dbdb29fb557dcab7788a258bd75d826cc12 +> Signed-off-by: Xavi Hernandez + +Fixes: bz#1703455 +Change-Id: I2a306dbdb29fb557dcab7788a258bd75d826cc12 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/171525 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + ...or-inodelk-contention-notify-on-partial-locks.t | 54 ++++++++++++++++++++++ + xlators/cluster/ec/src/ec-common.c | 2 +- + 2 files changed, 55 insertions(+), 1 deletion(-) + create mode 100644 tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t + +diff --git a/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t +new file mode 100644 +index 0000000..67fdb18 +--- /dev/null ++++ b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t +@@ -0,0 +1,54 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++function do_ls() { ++ local dir="${1}" ++ local i ++ ++ for i in {1..50}; do ++ ls -l $M0/${dir} >/dev/null & ++ ls -l $M1/${dir} >/dev/null & ++ ls -l $M2/${dir} >/dev/null & ++ ls -l $M3/${dir} >/dev/null & ++ done ++ wait ++} ++ ++function measure_time() { ++ { ++ LC_ALL=C ++ time -p "${@}" ++ } 2>&1 | awk '/^real/ { print $2 * 1000 }' ++} ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} ++ ++TEST $CLI volume set $V0 disperse.eager-lock on ++TEST $CLI volume set $V0 disperse.other-eager-lock on ++TEST $CLI volume set $V0 features.locks-notify-contention on ++TEST $CLI volume set $V0 disperse.eager-lock-timeout 10 ++TEST $CLI volume set $V0 disperse.other-eager-lock-timeout 10 ++ ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M2 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M3 ++TEST mkdir $M0/dir ++TEST touch $M0/dir/file.{1..10} ++ ++# Run multiple 'ls' concurrently from multiple clients so that they collide and ++# cause partial locks. ++TEST [[ $(measure_time do_ls dir) -lt 10000 ]] ++ ++cleanup +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index b1ba5e9..e85aa8b 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -2497,7 +2497,7 @@ ec_lock_release(ec_t *ec, inode_t *inode) + goto done; + } + lock = ctx->inode_lock; +- if ((lock == NULL) || !lock->acquired || lock->release) { ++ if ((lock == NULL) || lock->release) { + goto done; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch b/SOURCES/0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch new file mode 100644 index 0000000..398f460 --- /dev/null +++ b/SOURCES/0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch @@ -0,0 +1,65 @@ +From 55d47524c0c8a88204129c3a94d71779aae00beb Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 28 May 2019 08:18:12 +0530 +Subject: [PATCH 145/169] core: Capture process memory usage at the time of + call gf_msg_nomem + +Problem: All gluster processes call gf_mgm_nomem while calloc/malloc/realloc + throw an error but the message does not capture current memory usage of + gluster process + +Solution: Call getrusage to capture current memory usage of gluster + process + +> Change-Id: I2e0319da1f33b177fa042fdc9e7268068576c9c3 +> fixes: bz#1708051 +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22688/ +> Cherry pick from commit 8e1d53f14730ac1b1ca0ce9d9a0ccb32578fd4fb + +BUG: 1709087 +Change-Id: I2e0319da1f33b177fa042fdc9e7268068576c9c3 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/171587 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/logging.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c +index 5d46916..7f0eff6 100644 +--- a/libglusterfs/src/logging.c ++++ b/libglusterfs/src/logging.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + #ifdef HAVE_BACKTRACE + #include +@@ -1196,6 +1197,7 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function, + glusterfs_ctx_t *ctx = NULL; + int wlen = 0; + int priority; ++ struct rusage r_usage; + + this = THIS; + ctx = this->ctx; +@@ -1231,10 +1233,11 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function, + "]" + " [%s:%d:%s] %s: no memory " + "available for size (%" GF_PRI_SIZET +- ")" ++ ") current memory usage in kilobytes %ld" + " [call stack follows]\n", + timestr, gf_level_strings[level], (uint64_t)0, basename, +- line, function, domain, size); ++ line, function, domain, size, ++ (!getrusage(RUSAGE_SELF, &r_usage) ? r_usage.ru_maxrss : 0)); + if (-1 == ret) { + goto out; + } +-- +1.8.3.1 + diff --git a/SOURCES/0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch b/SOURCES/0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch new file mode 100644 index 0000000..50747cc --- /dev/null +++ b/SOURCES/0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch @@ -0,0 +1,146 @@ +From 8cc721ee43ac8038eecb712278378710ad0745ed Mon Sep 17 00:00:00 2001 +From: root +Date: Sun, 7 Apr 2019 19:31:17 +0530 +Subject: [PATCH 146/169] dht: Custom xattrs are not healed in case of + add-brick + +Problem: If any custom xattrs are set on the directory before + add a brick, xattrs are not healed on the directory + after adding a brick. + +Solution: xattr are not healed because dht_selfheal_dir_mkdir_lookup_cbk + checks the value of MDS and if MDS value is not negative + selfheal code path does not take reference of MDS xattrs.Change the + condition to take reference of MDS xattr so that custom xattrs are + populated on newly added brick + +> Updates: bz#1702299 +> Change-Id: Id14beedb98cce6928055f294e1594b22132e811c +> Signed-off-by: Mohit Agrawal +> (Cherry pick from commit aa52259de7b50625b754ce9fb5c0f38e22d79dd6) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22520/) + +BUG: 1702298 +Change-Id: Id14beedb98cce6928055f294e1594b22132e811c +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/171591 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/bug-1702299.t | 67 ++++++++++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-selfheal.c | 9 +---- + 2 files changed, 68 insertions(+), 8 deletions(-) + create mode 100644 tests/bugs/bug-1702299.t + +diff --git a/tests/bugs/bug-1702299.t b/tests/bugs/bug-1702299.t +new file mode 100644 +index 0000000..1cff2ed +--- /dev/null ++++ b/tests/bugs/bug-1702299.t +@@ -0,0 +1,67 @@ ++#!/bin/bash ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../dht.rc ++cleanup; ++ ++function get_getfattr { ++ local path=$1 ++ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//' ++} ++ ++function set_fattr { ++ for i in `seq 1 10` ++ do ++ setfattr -n user.foo -v "newabc" ./tmp${i} ++ if [ "$?" = "0" ] ++ then ++ succ=$((succ+1)) ++ else ++ fail=$((fail+1)) ++ fi ++ done ++} ++ ++ ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3} ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0; ++ ++cd $M0 ++TEST mkdir tmp{1..10} ++ ++succ=fail=0 ++## set user.foo xattr with value newabc after kill one brick ++set_fattr ++count=10 ++EXPECT "$succ" echo $count ++count=0 ++EXPECT "$fail" echo $count ++ ++cd - ++ ++# Add-brick ++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{4,5} ++ ++cd $M0 ++## At this point dht code will heal xattr on down brick only for those dirs ++## hashed subvol was up at the time of update xattr ++TEST stat ./tmp{1..10} ++ ++ ++## Count the user.foo xattr value with newabc on brick and compare with succ value ++count=`getfattr -n user.foo $B0/${V0}4/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l` ++EXPECT "$succ" echo $count ++ ++## Count the user.foo xattr value with newabc on brick and compare with succ value ++count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l` ++EXPECT "$succ" echo $count ++ ++ ++cd - ++TEST umount $M0 ++cleanup +diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c +index 5420fca..f5dfff9 100644 +--- a/xlators/cluster/dht/src/dht-selfheal.c ++++ b/xlators/cluster/dht/src/dht-selfheal.c +@@ -1310,12 +1310,8 @@ dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie, + int this_call_cnt = 0; + int missing_dirs = 0; + dht_layout_t *layout = NULL; +- dht_conf_t *conf = 0; + xlator_t *prev = 0; + loc_t *loc = NULL; +- int check_mds = 0; +- int errst = 0; +- int32_t mds_xattr_val[1] = {0}; + char gfid_local[GF_UUID_BUF_SIZE] = {0}; + int index = -1; + +@@ -1324,7 +1320,6 @@ dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie, + local = frame->local; + layout = local->layout; + loc = &local->loc; +- conf = this->private; + prev = cookie; + + if (!gf_uuid_is_null(local->gfid)) +@@ -1347,9 +1342,7 @@ dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie, + + if (!op_ret) { + dht_iatt_merge(this, &local->stbuf, stbuf); +- check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, +- mds_xattr_val, 1, &errst); +- if (dict_get(xattr, conf->mds_xattr_key) && check_mds && !errst) { ++ if (prev == local->mds_subvol) { + dict_unref(local->xattr); + local->xattr = dict_ref(xattr); + } +-- +1.8.3.1 + diff --git a/SOURCES/0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch b/SOURCES/0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch new file mode 100644 index 0000000..27f8a4e --- /dev/null +++ b/SOURCES/0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch @@ -0,0 +1,80 @@ +From d7795a592883cfb01da76b6905a7c9eb1e912bef Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 28 May 2019 08:28:29 +0530 +Subject: [PATCH 147/169] glusterd: bulkvoldict thread is not handling all + volumes + +Problem: In commit ac70f66c5805e10b3a1072bd467918730c0aeeb4 I + missed one condition to populate volume dictionary in + multiple threads while brick_multiplex is enabled.Due + to that glusterd is not sending volume dictionary for + all volumes to peer. + +Solution: Update the condition in code as well as update test case + also to avoid the issue + +> Change-Id: I06522dbdfee4f7e995d9cc7b7098fdf35340dc52 +> fixes: bz#1711250 +> Cherry pick from commit 4a5fb52eb1c5387a0fb8bfa1253e5227c7c255e8 +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22739/ + +BUG: 1711249 +Change-Id: I06522dbdfee4f7e995d9cc7b7098fdf35340dc52 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/171589 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/glusterd/bug-1699339.t | 16 ++++++++++------ + xlators/mgmt/glusterd/src/glusterd-utils.c | 2 +- + 2 files changed, 11 insertions(+), 7 deletions(-) + +diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t +index 3e950f4..bb8d4f4 100644 +--- a/tests/bugs/glusterd/bug-1699339.t ++++ b/tests/bugs/glusterd/bug-1699339.t +@@ -52,18 +52,22 @@ done + + TEST kill_glusterd 1 + +-vol1=$(printf "%s-vol%02d" $V0 1) ++TESTS_EXPECTED_IN_LOOP=4 ++for i in `seq 1 3 15` ++do ++vol1=$(printf "%s-vol%02d" $V0 $i) + TEST $CLI_2 volume set $vol1 performance.readdir-ahead on +-vol2=$(printf "%s-vol%02d" $V0 2) +-TEST $CLI_2 volume set $vol2 performance.readdir-ahead on ++done + + # Bring back 1st glusterd + TEST $glusterd_1 + EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + ++TESTS_EXPECTED_IN_LOOP=4 ++for i in `seq 1 3 15` ++do ++vol1=$(printf "%s-vol%02d" $V0 $i) + EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead +- +-vol_name=$(printf "%s-vol%02d" $V0 2) +-EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol2 performance.readdir-ahead ++done + + cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index efa5a86..8f1525e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3542,7 +3542,7 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + if ((i + 1) != totthread) { + arg->end = ((i + 1) * vol_per_thread_limit); + } else { +- arg->end = ((i * vol_per_thread_limit) + endindex); ++ arg->end = (((i + 1) * vol_per_thread_limit) + endindex); + } + th_ret = gf_thread_create_detached( + &th_id, glusterd_add_bulk_volumes_create_thread, arg, +-- +1.8.3.1 + diff --git a/SOURCES/0148-cluster-dht-Lookup-all-files-when-processing-directo.patch b/SOURCES/0148-cluster-dht-Lookup-all-files-when-processing-directo.patch new file mode 100644 index 0000000..b1a5651 --- /dev/null +++ b/SOURCES/0148-cluster-dht-Lookup-all-files-when-processing-directo.patch @@ -0,0 +1,70 @@ +From 92aadb6a5eeec75edf7f5a11a0ebd861dd85ca6b Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Mon, 20 May 2019 15:23:42 +0530 +Subject: [PATCH 148/169] cluster/dht: Lookup all files when processing + directory + +A rebalance process currently only looks up files +that it is supposed to migrate. This could cause issues +when lookup-optimize is enabled as the dir layout can be +updated with the commit hash before all files are looked up. +This is expecially problematic if one of the rebalance processes +fails to complete as clients will try to access files whose +linkto files might not have been created. +Each process will now lookup every file in the directory it is +processing. +Pros: Less likely that files will be inaccessible. +Cons: More lookup requests sent to the bricks and a potential +performance hit. +Note: this does not handle races such as when a layout is updated on disk +just as the create fop is sent by the client. + +upstream : https://review.gluster.org/#/c/glusterfs/+/22746/ + +>Change-Id: I22b55846effc08d3b827c3af9335229335f67fb8 +>fixes: bz#1711764 + +BUG#1714124 + +Change-Id: Ica6a9459befe53957f080001a2dda525b3b14d1c +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/172080 +Tested-by: RHGS Build Bot +Reviewed-by: Mohit Agrawal +--- + xlators/cluster/dht/src/dht-rebalance.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index efbe8a4..559f046 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -2741,12 +2741,6 @@ gf_defrag_migrate_single_file(void *opaque) + goto out; + } + +- if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index, +- entry->d_stat.ia_gfid)) { +- gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); +- goto out; +- } +- + gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid); + + gf_uuid_copy(entry_loc.pargfid, loc->gfid); +@@ -2772,6 +2766,12 @@ gf_defrag_migrate_single_file(void *opaque) + goto out; + } + ++ if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index, ++ entry->d_stat.ia_gfid)) { ++ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); ++ goto out; ++ } ++ + iatt_ptr = &iatt; + + hashed_subvol = dht_subvol_get_hashed(this, &entry_loc); +-- +1.8.3.1 + diff --git a/SOURCES/0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch b/SOURCES/0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch new file mode 100644 index 0000000..5caf3d4 --- /dev/null +++ b/SOURCES/0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch @@ -0,0 +1,452 @@ +From 86eee7e829bb33cac9b611da511ecbd2f03fab25 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Fri, 17 May 2019 19:26:48 +0530 +Subject: [PATCH 149/169] glusterd: Optimize code to copy dictionary in + handshake code path + +Problem: While high no. of volumes are configured around 2000 + glusterd has bottleneck during handshake at the time + of copying dictionary + +Solution: To avoid the bottleneck serialize a dictionary instead + of copying key-value pair one by one + +> Change-Id: I9fb332f432e4f915bc3af8dcab38bed26bda2b9a +> fixes: bz#1711297 +> Cherry picked from commit f8f09178bb890924a8050b466cc2e7a0a30e35a7 +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22742/) + +BUG: 1711296 +Change-Id: I9fb332f432e4f915bc3af8dcab38bed26bda2b9a +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/172255 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + libglusterfs/src/dict.c | 6 +- + libglusterfs/src/glusterfs/dict.h | 6 + + libglusterfs/src/libglusterfs.sym | 1 + + xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 27 ++-- + xlators/mgmt/glusterd/src/glusterd-utils.c | 187 +++++++++++++++++++++++---- + xlators/mgmt/glusterd/src/glusterd-utils.h | 3 +- + xlators/mgmt/glusterd/src/glusterd.h | 5 + + 7 files changed, 194 insertions(+), 41 deletions(-) + +diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c +index 4cd1fcf..6917df9 100644 +--- a/libglusterfs/src/dict.c ++++ b/libglusterfs/src/dict.c +@@ -2799,10 +2799,6 @@ dict_rename_key(dict_t *this, char *key, char *replace_key) + * 4 4 4 + */ + +-#define DICT_HDR_LEN 4 +-#define DICT_DATA_HDR_KEY_LEN 4 +-#define DICT_DATA_HDR_VAL_LEN 4 +- + /** + * dict_serialized_length_lk - return the length of serialized dict. This + * procedure has to be called with this->lock held. +@@ -2812,7 +2808,7 @@ dict_rename_key(dict_t *this, char *key, char *replace_key) + * : failure: -errno + */ + +-static int ++int + dict_serialized_length_lk(dict_t *this) + { + int ret = -EINVAL; +diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h +index 52b833f..022f564 100644 +--- a/libglusterfs/src/glusterfs/dict.h ++++ b/libglusterfs/src/glusterfs/dict.h +@@ -91,6 +91,9 @@ typedef struct _data_pair data_pair_t; + #define DICT_MAX_FLAGS 256 + #define DICT_FLAG_SET 1 + #define DICT_FLAG_CLEAR 0 ++#define DICT_HDR_LEN 4 ++#define DICT_DATA_HDR_KEY_LEN 4 ++#define DICT_DATA_HDR_VAL_LEN 4 + + struct _data { + char *data; +@@ -412,4 +415,7 @@ are_dicts_equal(dict_t *one, dict_t *two, + gf_boolean_t (*value_ignore)(char *k)); + int + dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result); ++ ++int ++dict_serialized_length_lk(dict_t *this); + #endif +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index cf5757c..ec474e7 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -405,6 +405,7 @@ dict_rename_key + dict_reset + dict_serialize + dict_serialized_length ++dict_serialized_length_lk + dict_serialize_value_with_delim + dict_set + dict_setn +diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +index 4ec9700..45f8f17 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +@@ -1528,11 +1528,9 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data) + + RCU_READ_UNLOCK; + +- ret = glusterd_add_volumes_to_export_dict(&peer_data); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, +- "Unable to add list of volumes " +- "in the peer_data dict for handshake"); ++ peer_data = dict_new(); ++ if (!peer_data) { ++ errno = ENOMEM; + goto out; + } + +@@ -1563,10 +1561,23 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data) + } + } + +- ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val, +- &req.vols.vols_len); +- if (ret) ++ /* Don't add any key-value in peer_data dictionary after call this function ++ */ ++ ret = glusterd_add_volumes_to_export_dict(peer_data, &req.vols.vols_val, ++ &req.vols.vols_len); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, ++ "Unable to add list of volumes " ++ "in the peer_data dict for handshake"); + goto out; ++ } ++ ++ if (!req.vols.vols_len) { ++ ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val, ++ &req.vols.vols_len); ++ if (ret) ++ goto out; ++ } + + ret = glusterd_submit_request( + peerinfo->rpc, &req, frame, peerinfo->peer, GLUSTERD_FRIEND_ADD, NULL, +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 8f1525e..2bc4836 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3466,11 +3466,118 @@ out: + return NULL; + } + ++int ++glusterd_dict_searialize(dict_t *dict_arr[], int count, int totcount, char *buf) ++{ ++ int i = 0; ++ int32_t keylen = 0; ++ int64_t netword = 0; ++ data_pair_t *pair = NULL; ++ int dict_count = 0; ++ int ret = 0; ++ ++ netword = hton32(totcount); ++ memcpy(buf, &netword, sizeof(netword)); ++ buf += DICT_HDR_LEN; ++ ++ for (i = 0; i < count; i++) { ++ if (dict_arr[i]) { ++ dict_count = dict_arr[i]->count; ++ pair = dict_arr[i]->members_list; ++ while (dict_count) { ++ if (!pair) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, ++ LG_MSG_PAIRS_LESS_THAN_COUNT, ++ "less than count data pairs found!"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (!pair->key) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, ++ "pair->key is null!"); ++ ret = -1; ++ goto out; ++ } ++ ++ keylen = strlen(pair->key); ++ netword = hton32(keylen); ++ memcpy(buf, &netword, sizeof(netword)); ++ buf += DICT_DATA_HDR_KEY_LEN; ++ if (!pair->value) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, ++ "pair->value is null!"); ++ ret = -1; ++ goto out; ++ } ++ ++ netword = hton32(pair->value->len); ++ memcpy(buf, &netword, sizeof(netword)); ++ buf += DICT_DATA_HDR_VAL_LEN; ++ ++ memcpy(buf, pair->key, keylen); ++ buf += keylen; ++ *buf++ = '\0'; ++ ++ if (pair->value->data) { ++ memcpy(buf, pair->value->data, pair->value->len); ++ buf += pair->value->len; ++ } ++ ++ pair = pair->next; ++ dict_count--; ++ } ++ } ++ } ++ ++out: ++ for (i = 0; i < count; i++) { ++ if (dict_arr[i]) ++ dict_unref(dict_arr[i]); ++ } ++ return ret; ++} ++ ++int ++glusterd_dict_arr_serialize(dict_t *dict_arr[], int count, char **buf, ++ u_int *length) ++{ ++ ssize_t len = 0; ++ int i = 0; ++ int totcount = 0; ++ int ret = 0; ++ ++ for (i = 0; i < count; i++) { ++ if (dict_arr[i]) { ++ len += dict_serialized_length_lk(dict_arr[i]); ++ totcount += dict_arr[i]->count; ++ } ++ } ++ ++ // Subtract HDR_LEN except one dictionary ++ len = len - ((count - 1) * DICT_HDR_LEN); ++ ++ *buf = GF_MALLOC(len, gf_common_mt_char); ++ if (*buf == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ if (length != NULL) { ++ *length = len; ++ } ++ ++ ret = glusterd_dict_searialize(dict_arr, count, totcount, *buf); ++ ++out: ++ return ret; ++} ++ + int32_t +-glusterd_add_volumes_to_export_dict(dict_t **peer_data) ++glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf, ++ u_int *length) + { + int32_t ret = -1; +- dict_t *dict = NULL; + dict_t *dict_arr[128] = { + 0, + }; +@@ -3496,10 +3603,6 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + priv = this->private; + GF_ASSERT(priv); + +- dict = dict_new(); +- if (!dict) +- goto out; +- + /* Count the total number of volumes */ + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++; + +@@ -3520,14 +3623,15 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + count++; +- ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); ++ ret = glusterd_add_volume_to_dict(volinfo, peer_data, count, ++ "volume"); + if (ret) + goto out; + + if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) + continue; + +- ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count, ++ ret = glusterd_vol_add_quota_conf_to_dict(volinfo, peer_data, count, + "volume"); + if (ret) + goto out; +@@ -3569,34 +3673,34 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + + gf_log(this->name, GF_LOG_INFO, + "Finished dictionary popluation in all threads"); +- for (i = 0; i < totthread; i++) { +- dict_copy_with_ref(dict_arr[i], dict); +- dict_unref(dict_arr[i]); +- } +- gf_log(this->name, GF_LOG_INFO, +- "Finished merger of all dictionraies into single one"); + } + +- ret = dict_set_int32n(dict, "count", SLEN("count"), volcnt); ++ ret = dict_set_int32n(peer_data, "count", SLEN("count"), volcnt); + if (ret) + goto out; + +- ctx.dict = dict; ++ ctx.dict = peer_data; + ctx.prefix = "global"; + ctx.opt_count = 1; + ctx.key_name = "key"; + ctx.val_name = "val"; + dict_foreach(priv->opts, _add_dict_to_prdict, &ctx); + ctx.opt_count--; +- ret = dict_set_int32n(dict, "global-opt-count", SLEN("global-opt-count"), +- ctx.opt_count); ++ ret = dict_set_int32n(peer_data, "global-opt-count", ++ SLEN("global-opt-count"), ctx.opt_count); + if (ret) + goto out; + +- *peer_data = dict; ++ if (totthread) { ++ gf_log(this->name, GF_LOG_INFO, ++ "Finished merger of all dictionraies into single one"); ++ dict_arr[totthread++] = peer_data; ++ ret = glusterd_dict_arr_serialize(dict_arr, totthread, buf, length); ++ gf_log(this->name, GF_LOG_INFO, ++ "Serialize dictionary data return is %d", ret); ++ } ++ + out: +- if (ret) +- dict_unref(dict); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +@@ -4940,6 +5044,7 @@ glusterd_import_friend_volumes_synctask(void *opaque) + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + dict_t *peer_data = NULL; ++ glusterd_friend_synctask_args_t *arg = NULL; + + this = THIS; + GF_ASSERT(this); +@@ -4947,8 +5052,20 @@ glusterd_import_friend_volumes_synctask(void *opaque) + conf = this->private; + GF_ASSERT(conf); + +- peer_data = (dict_t *)opaque; +- GF_ASSERT(peer_data); ++ arg = opaque; ++ if (!arg) ++ goto out; ++ ++ peer_data = dict_new(); ++ if (!peer_data) { ++ goto out; ++ } ++ ++ ret = dict_unserialize(arg->dict_buf, arg->dictlen, &peer_data); ++ if (ret) { ++ errno = ENOMEM; ++ goto out; ++ } + + ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count); + if (ret) +@@ -4980,6 +5097,11 @@ glusterd_import_friend_volumes_synctask(void *opaque) + out: + if (peer_data) + dict_unref(peer_data); ++ if (arg) { ++ if (arg->dict_buf) ++ GF_FREE(arg->dict_buf); ++ GF_FREE(arg); ++ } + + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +@@ -5146,7 +5268,7 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) + gf_boolean_t update = _gf_false; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; +- dict_t *peer_data_copy = NULL; ++ glusterd_friend_synctask_args_t *arg = NULL; + + this = THIS; + GF_ASSERT(this); +@@ -5188,12 +5310,23 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) + * first brick to come up before attaching the subsequent bricks + * in case brick multiplexing is enabled + */ +- peer_data_copy = dict_copy_with_ref(peer_data, NULL); +- glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, +- peer_data_copy); ++ arg = GF_CALLOC(1, sizeof(*arg), gf_common_mt_char); ++ ret = dict_allocate_and_serialize(peer_data, &arg->dict_buf, ++ &arg->dictlen); ++ if (ret < 0) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "dict_serialize failed while handling " ++ " import friend volume request"); ++ goto out; ++ } ++ ++ glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg); + } + + out: ++ if (ret && arg) { ++ GF_FREE(arg); ++ } + gf_msg_debug(this->name, 0, "Returning with ret: %d, status: %d", ret, + *status); + return ret; +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 3647c34..6ad8062 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -227,7 +227,8 @@ glusterd_volume_brickinfo_get_by_brick(char *brick, glusterd_volinfo_t *volinfo, + gf_boolean_t construct_real_path); + + int32_t +-glusterd_add_volumes_to_export_dict(dict_t **peer_data); ++glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf, ++ u_int *length); + + int32_t + glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 2ea8560..f96bca3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -240,6 +240,11 @@ typedef struct glusterd_add_dict_args { + int end; + } glusterd_add_dict_args_t; + ++typedef struct glusterd_friend_synctask_args { ++ char *dict_buf; ++ u_int dictlen; ++} glusterd_friend_synctask_args_t; ++ + typedef enum gf_brick_status { + GF_BRICK_STOPPED, + GF_BRICK_STARTED, +-- +1.8.3.1 + diff --git a/SOURCES/0150-libglusterfs-define-macros-needed-for-cloudsync.patch b/SOURCES/0150-libglusterfs-define-macros-needed-for-cloudsync.patch new file mode 100644 index 0000000..b5fbef2 --- /dev/null +++ b/SOURCES/0150-libglusterfs-define-macros-needed-for-cloudsync.patch @@ -0,0 +1,38 @@ +From 4c410d99792808b0c8deb601d50c66df19f73ca7 Mon Sep 17 00:00:00 2001 +From: Anuradha Talur +Date: Thu, 25 Oct 2018 17:23:10 -0400 +Subject: [PATCH 150/169] libglusterfs: define macros needed for cloudsync + +backport of patch: https://review.gluster.org/#/c/glusterfs/+/21585/ + +> Change-Id: Iec5ce7f17fbf899f881a58cd20c4c967e3b71668 +> fixes: bz#1642168 +> Signed-off-by: Anuradha Talur + +Change-Id: I79e5d955559acdec7cbeb8f35c8482b3b6ff8b0f +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/172189 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/glusterfs/glusterfs.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index fb727fc..516b497 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -364,6 +364,10 @@ enum gf_internal_fop_indicator { + } while (0) + + #define GF_CS_OBJECT_SIZE "trusted.glusterfs.cs.object_size" ++#define GF_CS_BLOCK_SIZE "trusted.glusterfs.cs.block_size" ++#define GF_CS_NUM_BLOCKS "trusted.glusterfs.cs.num_blocks" ++ ++#define GF_CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid" + + #define GF_CS_OBJECT_UPLOAD_COMPLETE "trusted.glusterfs.csou.complete" + #define GF_CS_OBJECT_REMOTE "trusted.glusterfs.cs.remote" +-- +1.8.3.1 + diff --git a/SOURCES/0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch b/SOURCES/0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch new file mode 100644 index 0000000..d95db3d --- /dev/null +++ b/SOURCES/0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch @@ -0,0 +1,156 @@ +From bffdcce7119f3ed68694df918e504cc241502835 Mon Sep 17 00:00:00 2001 +From: Anuradha Talur +Date: Mon, 19 Nov 2018 17:57:18 -0800 +Subject: [PATCH 151/169] mgmt/glusterd: Make changes related to cloudsync + xlator + +1) The placement of cloudsync xlator has been changed +to make it shard xlator's child. If cloudsync has to +work with shard in the graph, it needs to be child of shard. + +backport of: https://review.gluster.org/#/c/glusterfs/+/21681/ + +> Change-Id: Ib55424fdcb7ce8edae9f19b8a6e3d3ba86c1f0c4 +> fixes: bz#1642168 +> Signed-off-by: Anuradha Talur + +Change-Id: I68fd43b2c559cc2d9f05e1ab19784b174233d690 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/172190 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + tests/basic/glusterd/check-cloudsync-ancestry.t | 48 +++++++++++++++++++++++++ + tests/volume.rc | 21 +++++++++++ + xlators/mgmt/glusterd/src/glusterd-volgen.c | 24 ++++++------- + 3 files changed, 81 insertions(+), 12 deletions(-) + create mode 100644 tests/basic/glusterd/check-cloudsync-ancestry.t + +diff --git a/tests/basic/glusterd/check-cloudsync-ancestry.t b/tests/basic/glusterd/check-cloudsync-ancestry.t +new file mode 100644 +index 0000000..ff6ffee +--- /dev/null ++++ b/tests/basic/glusterd/check-cloudsync-ancestry.t +@@ -0,0 +1,48 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++# When shard and cloudsync xlators enabled on a volume, shard xlator ++# should be an ancestor of cloudsync. This testcase is to check this condition. ++ ++cleanup; ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3 ++ ++volfile=$(gluster system:: getwd)"/vols/$V0/trusted-$V0.tcp-fuse.vol" ++ ++#Test that both shard and cloudsync are not loaded ++EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard ++EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync ++ ++#Enable shard and cloudsync in that order and check if volfile is correct ++TEST $CLI volume set $V0 shard on ++TEST $CLI volume set $V0 cloudsync on ++ ++#Test that both shard and cloudsync are loaded ++EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard ++EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync ++ ++EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync ++ ++#Disable shard and cloudsync ++TEST $CLI volume set $V0 shard off ++TEST $CLI volume set $V0 cloudsync off ++ ++#Test that both shard and cloudsync are not loaded ++EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard ++EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync ++ ++#Enable cloudsync and shard in that order and check if volfile is correct ++TEST $CLI volume set $V0 cloudsync on ++TEST $CLI volume set $V0 shard on ++ ++#Test that both shard and cloudsync are loaded ++EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard ++EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync ++ ++EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync ++ ++cleanup; +diff --git a/tests/volume.rc b/tests/volume.rc +index b326098..a0ea3b8 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -891,3 +891,24 @@ function check_changelog_op { + + $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep "$op" | wc -l + } ++ ++function volgen_check_ancestry { ++ #Returns Y if ancestor_xl is an ancestor of $child_xl according to the volfile ++ local volfile="$1" ++ ++ local child_xl_type="$2" ++ local child_xl="$3" ++ ++ local ancestor_xl_type="$4" ++ local ancestor_xl="$5" ++ ++ child_linenum=$(awk '/type $child_xl_type\/$child_xl/ {print FNR}' $volfile) ++ ancestor_linenum=$(awk '/type $ancestor_xl_type\/$ancestor_xl/ {print FNR}' $volfile) ++ ++ if [ $child_linenum -lt $ancestor_linenum ]; ++ then ++ echo "Y" ++ else ++ echo "N" ++ fi ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 77aa705..8b58d40 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4360,6 +4360,18 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + "tcp", set_dict); + } + ++ ret = dict_get_str_boolean(set_dict, "features.cloudsync", _gf_false); ++ if (ret == -1) ++ goto out; ++ ++ if (ret) { ++ xl = volgen_graph_add(graph, "features/cloudsync", volname); ++ if (!xl) { ++ ret = -1; ++ goto out; ++ } ++ } ++ + ret = dict_get_str_boolean(set_dict, "features.shard", _gf_false); + if (ret == -1) + goto out; +@@ -4567,18 +4579,6 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + if (ret) + return -1; + +- ret = dict_get_str_boolean(set_dict, "features.cloudsync", _gf_false); +- if (ret == -1) +- goto out; +- +- if (ret) { +- xl = volgen_graph_add(graph, "features/cloudsync", volname); +- if (!xl) { +- ret = -1; +- goto out; +- } +- } +- + /* if the client is part of 'gfproxyd' server, then we need to keep the + volume name as 'gfproxyd-', for better portmapper options */ + subvol = volname; +-- +1.8.3.1 + diff --git a/SOURCES/0152-storage-posix-changes-with-respect-to-cloudsync.patch b/SOURCES/0152-storage-posix-changes-with-respect-to-cloudsync.patch new file mode 100644 index 0000000..1610009 --- /dev/null +++ b/SOURCES/0152-storage-posix-changes-with-respect-to-cloudsync.patch @@ -0,0 +1,403 @@ +From 10e9f850017d58fcd813ccce253784280326f1d0 Mon Sep 17 00:00:00 2001 +From: Anuradha Talur +Date: Tue, 20 Nov 2018 13:15:26 -0800 +Subject: [PATCH 152/169] storage/posix: changes with respect to cloudsync + +Main changes include logic to update iatt buf +with file size from extended attributes in posix +rather than having this logic in cloudsync xlator. + +backport of:https://review.gluster.org/#/c/glusterfs/+/21694/ + +> Change-Id: I44f5f8df7a01e496372557fe2f4eff368dbdaa33 +> fixes: bz#1642168 +> Signed-off-by: Anuradha Talur + +Change-Id: I34880d856fb3add4ce88d64021d08d95405fc1c1 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/172191 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/storage/posix/src/posix-entry-ops.c | 1 + + xlators/storage/posix/src/posix-helpers.c | 50 +++++++++ + xlators/storage/posix/src/posix-inode-fd-ops.c | 139 ++++++++++++++++++++++--- + xlators/storage/posix/src/posix.h | 2 + + 4 files changed, 177 insertions(+), 15 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index fbd83c4..b24a052 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -272,6 +272,7 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + } + } + ++ posix_update_iatt_buf(&buf, -1, real_path, xdata); + if (priv->update_pgfid_nlinks) { + if (!gf_uuid_is_null(loc->pargfid) && !IA_ISDIR(buf.ia_type)) { + MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 37e33a9..d0fd45a 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -3453,3 +3453,53 @@ posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno) + out: + return ret; + } ++ ++void ++posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req) ++{ ++ int ret = 0; ++ char val[4096] = { ++ 0, ++ }; ++ ++ if (!xattr_req) ++ return; ++ ++ if (!(dict_getn(xattr_req, GF_CS_OBJECT_STATUS, ++ strlen(GF_CS_OBJECT_STATUS)))) ++ return; ++ ++ if (fd != -1) { ++ ret = sys_fgetxattr(fd, GF_CS_OBJECT_SIZE, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_size = atoll(val); ++ } else { ++ /* Safe to assume that the other 2 xattrs are also not set*/ ++ return; ++ } ++ ret = sys_fgetxattr(fd, GF_CS_BLOCK_SIZE, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_blksize = atoll(val); ++ } ++ ret = sys_fgetxattr(fd, GF_CS_NUM_BLOCKS, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_blocks = atoll(val); ++ } ++ } else { ++ ret = sys_lgetxattr(loc, GF_CS_OBJECT_SIZE, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_size = atoll(val); ++ } else { ++ /* Safe to assume that the other 2 xattrs are also not set*/ ++ return; ++ } ++ ret = sys_lgetxattr(loc, GF_CS_BLOCK_SIZE, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_blksize = atoll(val); ++ } ++ ret = sys_lgetxattr(loc, GF_CS_NUM_BLOCKS, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_blocks = atoll(val); ++ } ++ } ++} +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 7dbbd3d..065fced 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -108,6 +108,63 @@ extern char *marker_xattrs[]; + static char *disallow_removexattrs[] = {GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY, + NULL}; + ++void ++posix_cs_build_xattr_rsp(xlator_t *this, dict_t **rsp, dict_t *req, int fd, ++ char *loc) ++{ ++ int ret = 0; ++ uuid_t uuid; ++ ++ if (!(dict_getn(req, GF_CS_OBJECT_STATUS, strlen(GF_CS_OBJECT_STATUS)))) ++ return; ++ ++ if (!(*rsp)) { ++ *rsp = dict_new(); ++ if (!(*rsp)) { ++ return; ++ } ++ } ++ ++ if (fd != -1) { ++ if (dict_getn(req, GF_CS_XATTR_ARCHIVE_UUID, ++ strlen(GF_CS_XATTR_ARCHIVE_UUID))) { ++ ret = sys_fgetxattr(fd, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); ++ if (ret > 0) { ++ ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, ++ true); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, ++ "%s: Failed to set " ++ "dictionary value for %s for fd %d", ++ uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, fd); ++ } ++ } else { ++ gf_msg_debug(this->name, 0, "getxattr failed on %s for fd %d", ++ GF_CS_XATTR_ARCHIVE_UUID, fd); ++ } ++ } ++ } else { ++ if (dict_getn(req, GF_CS_XATTR_ARCHIVE_UUID, ++ strlen(GF_CS_XATTR_ARCHIVE_UUID))) { ++ ret = sys_lgetxattr(loc, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); ++ if (ret > 0) { ++ ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, ++ true); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, ++ "%s: Failed to set " ++ "dictionary value for %s for loc %s", ++ uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, loc); ++ } ++ } else { ++ gf_msg_debug(this->name, 0, "getxattr failed on %s for %s", ++ GF_CS_XATTR_ARCHIVE_UUID, loc); ++ } ++ } ++ } ++ return; ++} ++ + int32_t + posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + { +@@ -150,8 +207,11 @@ posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + + posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata, + &xattr_rsp, _gf_true); ++ ++ posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, -1, real_path); + } + ++ posix_update_iatt_buf(&buf, -1, real_path, xdata); + op_ret = 0; + + out: +@@ -422,6 +482,8 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + if (xdata) + xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + &statpost); ++ posix_update_iatt_buf(&statpre, -1, real_path, xdata); ++ posix_update_iatt_buf(&statpost, -1, real_path, xdata); + op_ret = 0; + + out: +@@ -898,6 +960,7 @@ posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + } + } + ++ posix_update_iatt_buf(statpre, pfd->fd, NULL, xdata); + /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill. + * If it fails, fall back to _posix_do_zerofill() and an optional fsync. + */ +@@ -1366,6 +1429,7 @@ posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + } + } + ++ posix_update_iatt_buf(&prebuf, -1, real_path, xdata); + op_ret = sys_truncate(real_path, offset); + if (op_ret == -1) { + op_errno = errno; +@@ -1405,6 +1469,10 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + int32_t _fd = -1; + struct posix_fd *pfd = NULL; + struct posix_private *priv = NULL; ++ struct iatt preop = { ++ 0, ++ }; ++ dict_t *rsp_xdata = NULL; + struct iatt stbuf = { + 0, + }; +@@ -1471,6 +1539,18 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + pfd->flags = flags; + pfd->fd = _fd; + ++ if (xdata) { ++ op_ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); ++ if (op_ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, ++ "pre-operation fstat failed on fd=%p", fd); ++ goto out; ++ } ++ ++ posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, xdata, ++ &rsp_xdata, _gf_true); ++ } ++ + op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, +@@ -1488,7 +1568,7 @@ out: + + SET_TO_OLD_FS_ID(); + +- STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, NULL); ++ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, rsp_xdata); + + return 0; + } +@@ -1573,6 +1653,7 @@ posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + } + } + ++ posix_update_iatt_buf(&preop, _fd, NULL, xdata); + op_ret = sys_pread(_fd, iobuf->ptr, size, offset); + if (op_ret == -1) { + op_errno = errno; +@@ -1878,6 +1959,7 @@ posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + } + } + ++ posix_update_iatt_buf(&preop, _fd, NULL, xdata); + if (locked && write_append) { + if (preop.ia_size == offset || (fd->flags & O_APPEND)) + is_append = 1; +@@ -2531,10 +2613,8 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + 0, + }; + data_t *tdata = NULL; +- char stime[4096]; +- char sxattr[4096]; ++ char *cs_var = NULL; + gf_cs_obj_state state = -1; +- char remotepath[4096] = {0}; + int i = 0; + int len; + +@@ -2588,10 +2668,11 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + goto unlock; + } + +- sprintf(stime, "%" PRId64, tmp_stbuf.ia_mtime); ++ cs_var = alloca(4096); ++ sprintf(cs_var, "%" PRId64, tmp_stbuf.ia_mtime); + + /*TODO: may be should consider nano-second also */ +- if (strncmp(stime, tdata->data, tdata->len) != 0) { ++ if (strncmp(cs_var, tdata->data, tdata->len) > 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "mtime " + "passed is different from seen by file now." +@@ -2601,31 +2682,54 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + goto unlock; + } + +- len = sprintf(sxattr, "%" PRIu64, tmp_stbuf.ia_size); ++ len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_size); + +- ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, sxattr, len, ++ ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, cs_var, len, + flags); + if (ret) { ++ op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_OBJECT_SIZE, + ret); ++ goto unlock; ++ } ++ ++ len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_blocks); ++ ++ ret = sys_lsetxattr(real_path, GF_CS_NUM_BLOCKS, cs_var, len, ++ flags); ++ if (ret) { + op_errno = errno; ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "setxattr failed. key %s err %d", GF_CS_NUM_BLOCKS, ret); + goto unlock; + } + ++ len = sprintf(cs_var, "%" PRIu32, tmp_stbuf.ia_blksize); ++ ++ ret = sys_lsetxattr(real_path, GF_CS_BLOCK_SIZE, cs_var, len, ++ flags); ++ if (ret) { ++ op_errno = errno; ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "setxattr failed. key %s err %d", GF_CS_BLOCK_SIZE, ret); ++ goto unlock; ++ } ++ ++ memset(cs_var, 0, 4096); + if (loc->path[0] == '/') { + for (i = 1; i < strlen(loc->path); i++) { +- remotepath[i - 1] = loc->path[i]; ++ cs_var[i - 1] = loc->path[i]; + } + +- remotepath[i] = '\0'; +- gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", +- remotepath); ++ cs_var[i] = '\0'; ++ gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", cs_var); + } + +- ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, remotepath, +- strlen(loc->path), flags); ++ ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, cs_var, ++ strlen(cs_var), flags); + if (ret) { ++ op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "setxattr failed - %s" + " %d", +@@ -2635,13 +2739,14 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + + ret = sys_truncate(real_path, 0); + if (ret) { ++ op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "truncate failed - %s" + " %d", + GF_CS_OBJECT_SIZE, ret); +- op_errno = errno; + ret = sys_lremovexattr(real_path, GF_CS_OBJECT_REMOTE); + if (ret) { ++ op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "removexattr " + "failed post processing- %s" +@@ -2659,6 +2764,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + } + unlock: + UNLOCK(&loc->inode->lock); ++ op_ret = ret; + goto out; + } + +@@ -4927,6 +5033,7 @@ posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + } + } + ++ posix_update_iatt_buf(&preop, _fd, NULL, xdata); + op_ret = sys_ftruncate(_fd, offset); + + if (op_ret == -1) { +@@ -5008,8 +5115,10 @@ posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + } ++ posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, _fd, NULL); + } + ++ posix_update_iatt_buf(&buf, _fd, NULL, xdata); + op_ret = 0; + + out: +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index d5ba08c..1da4d01 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -664,4 +664,6 @@ posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); + int + posix_spawn_ctx_janitor_thread(xlator_t *this); + ++void ++posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + #endif /* _POSIX_H */ +-- +1.8.3.1 + diff --git a/SOURCES/0153-features-cloudsync-Added-some-new-functions.patch b/SOURCES/0153-features-cloudsync-Added-some-new-functions.patch new file mode 100644 index 0000000..d28a7a5 --- /dev/null +++ b/SOURCES/0153-features-cloudsync-Added-some-new-functions.patch @@ -0,0 +1,1077 @@ +From 90254e4ae9455fa0a126f83700978a9314eb79ea Mon Sep 17 00:00:00 2001 +From: Anuradha Talur +Date: Thu, 29 Nov 2018 12:54:21 -0800 +Subject: [PATCH 153/169] features/cloudsync : Added some new functions + +This patch contains the following changes: +1) Store ID info will now be stored in the inode ctx +2) Added new readv type where read is made directly + from the remote store. This choice is made by + volume set operation. +3) cs_forget() was missing. Added it. + +backport of:https://review.gluster.org/#/c/glusterfs/+/21757/ + +> Change-Id: Ie3232b3d7ffb5313a03f011b0553b19793eedfa2 +> fixes: bz#1642168 +> Signed-off-by: Anuradha Talur + +Change-Id: I089e5a8c93049cf6bfabf011673796e38e78d7ee +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/172192 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/features/cloudsync/src/cloudsync-common.c | 16 + + xlators/features/cloudsync/src/cloudsync-common.h | 35 ++ + xlators/features/cloudsync/src/cloudsync-fops-c.py | 12 +- + .../features/cloudsync/src/cloudsync-mem-types.h | 1 + + xlators/features/cloudsync/src/cloudsync.c | 600 ++++++++++++++++++--- + xlators/features/cloudsync/src/cloudsync.h | 20 + + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 7 +- + 7 files changed, 597 insertions(+), 94 deletions(-) + +diff --git a/xlators/features/cloudsync/src/cloudsync-common.c b/xlators/features/cloudsync/src/cloudsync-common.c +index aee1f06..445a31b 100644 +--- a/xlators/features/cloudsync/src/cloudsync-common.c ++++ b/xlators/features/cloudsync/src/cloudsync-common.c +@@ -11,6 +11,20 @@ + #include "cloudsync-common.h" + + void ++cs_xattrinfo_wipe(cs_local_t *local) ++{ ++ if (local->xattrinfo.lxattr) { ++ if (local->xattrinfo.lxattr->file_path) ++ GF_FREE(local->xattrinfo.lxattr->file_path); ++ ++ if (local->xattrinfo.lxattr->volname) ++ GF_FREE(local->xattrinfo.lxattr->volname); ++ ++ GF_FREE(local->xattrinfo.lxattr); ++ } ++} ++ ++void + cs_local_wipe(xlator_t *this, cs_local_t *local) + { + if (!local) +@@ -40,5 +54,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local) + if (local->remotepath) + GF_FREE(local->remotepath); + ++ cs_xattrinfo_wipe(local); ++ + mem_put(local); + } +diff --git a/xlators/features/cloudsync/src/cloudsync-common.h b/xlators/features/cloudsync/src/cloudsync-common.h +index 7b3520c..11d2334 100644 +--- a/xlators/features/cloudsync/src/cloudsync-common.h ++++ b/xlators/features/cloudsync/src/cloudsync-common.h +@@ -14,9 +14,23 @@ + #include + #include + #include ++#include + #include "cloudsync-mem-types.h" + #include "cloudsync-messages.h" + ++typedef struct cs_loc_xattr { ++ char *file_path; ++ uuid_t uuid; ++ uuid_t gfid; ++ char *volname; ++} cs_loc_xattr_t; ++ ++typedef struct cs_size_xattr { ++ uint64_t size; ++ uint64_t blksize; ++ uint64_t blocks; ++} cs_size_xattr_t; ++ + typedef struct cs_local { + loc_t loc; + fd_t *fd; +@@ -34,10 +48,25 @@ typedef struct cs_local { + int call_cnt; + inode_t *inode; + char *remotepath; ++ ++ struct { ++ /* offset, flags and size are the information needed ++ * by read fop for remote read operation. These will be ++ * populated in cloudsync read fop, before being passed ++ * on to the plugin performing remote read. ++ */ ++ off_t offset; ++ uint32_t flags; ++ size_t size; ++ cs_loc_xattr_t *lxattr; ++ } xattrinfo; ++ + } cs_local_t; + + typedef int (*fop_download_t)(call_frame_t *frame, void *config); + ++typedef int (*fop_remote_read_t)(call_frame_t *, void *); ++ + typedef void *(*store_init)(xlator_t *this); + + typedef int (*store_reconfigure)(xlator_t *this, dict_t *options); +@@ -48,6 +77,7 @@ struct cs_remote_stores { + char *name; /* store name */ + void *config; /* store related information */ + fop_download_t dlfop; /* store specific download function */ ++ fop_remote_read_t rdfop; /* store specific read function */ + store_init init; /* store init to initialize store config */ + store_reconfigure reconfigure; /* reconfigure store config */ + store_fini fini; +@@ -59,11 +89,15 @@ typedef struct cs_private { + struct cs_remote_stores *stores; + gf_boolean_t abortdl; + pthread_spinlock_t lock; ++ gf_boolean_t remote_read; + } cs_private_t; + + void + cs_local_wipe(xlator_t *this, cs_local_t *local); + ++void ++cs_xattrinfo_wipe(cs_local_t *local); ++ + #define CS_STACK_UNWIND(fop, frame, params...) \ + do { \ + cs_local_t *__local = NULL; \ +@@ -90,6 +124,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local); + + typedef struct store_methods { + int (*fop_download)(call_frame_t *frame, void *config); ++ int (*fop_remote_read)(call_frame_t *, void *); + /* return type should be the store config */ + void *(*fop_init)(xlator_t *this); + int (*fop_reconfigure)(xlator_t *this, dict_t *options); +diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py +index 3122bd3..a7a2201 100755 +--- a/xlators/features/cloudsync/src/cloudsync-fops-c.py ++++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py +@@ -137,15 +137,15 @@ cs_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + } else { + __cs_inode_ctx_update (this, fd->inode, val); + gf_msg (this->name, GF_LOG_INFO, 0, 0, +- " state = %ld", val); ++ " state = %" PRIu64, val); + + if (local->call_cnt == 1 && + (val == GF_CS_REMOTE || + val == GF_CS_DOWNLOADING)) { + gf_msg (this->name, GF_LOG_INFO, 0, + 0, " will repair and download " +- "the file, current state : %ld", +- val); ++ "the file, current state : %" ++ PRIu64, val); + goto repair; + } else { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, +@@ -274,7 +274,7 @@ fd_ops = ['readv', 'writev', 'flush', 'fsync', 'fsyncdir', 'ftruncate', + # These are the current actual lists used to generate the code + + # The following list contains fops which are fd based that modifies data +-fd_data_modify_op_fop_template = ['readv', 'writev', 'flush', 'fsync', ++fd_data_modify_op_fop_template = ['writev', 'flush', 'fsync', + 'ftruncate', 'rchecksum', 'fallocate', + 'discard', 'zerofill', 'seek'] + +@@ -284,8 +284,8 @@ loc_stat_op_fop_template = ['lookup', 'stat', 'discover', 'access', 'setattr', + 'getattr'] + + # These fops need a separate implementation +-special_fops = ['readdirp', 'statfs', 'setxattr', 'unlink', 'getxattr', +- 'truncate', 'fstat'] ++special_fops = ['statfs', 'setxattr', 'unlink', 'getxattr', ++ 'truncate', 'fstat', 'readv'] + + def gen_defaults(): + for name in ops: +diff --git a/xlators/features/cloudsync/src/cloudsync-mem-types.h b/xlators/features/cloudsync/src/cloudsync-mem-types.h +index 9e6837a..2203464 100644 +--- a/xlators/features/cloudsync/src/cloudsync-mem-types.h ++++ b/xlators/features/cloudsync/src/cloudsync-mem-types.h +@@ -16,6 +16,7 @@ enum cs_mem_types_ { + gf_cs_mt_cs_private_t = gf_common_mt_end + 1, + gf_cs_mt_cs_remote_stores_t, + gf_cs_mt_cs_inode_ctx_t, ++ gf_cs_mt_cs_lxattr_t, + gf_cs_mt_end + }; + #endif /* __CLOUDSYNC_MEM_TYPES_H__ */ +diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c +index fbdcdf7..2240fc3 100644 +--- a/xlators/features/cloudsync/src/cloudsync.c ++++ b/xlators/features/cloudsync/src/cloudsync.c +@@ -16,6 +16,7 @@ + #include + #include "cloudsync-autogen-fops.h" + ++#include + #include + + void +@@ -72,6 +73,8 @@ cs_init(xlator_t *this) + + this->private = priv; + ++ GF_OPTION_INIT("cloudsync-remote-read", priv->remote_read, bool, out); ++ + /* temp workaround. Should be configurable through glusterd*/ + per_vol = _gf_true; + +@@ -135,6 +138,18 @@ cs_init(xlator_t *this) + + (void)dlerror(); + ++ if (priv->remote_read) { ++ priv->stores->rdfop = store_methods->fop_remote_read; ++ if (!priv->stores->rdfop) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "failed to get" ++ " read fop %s", ++ dlerror()); ++ ret = -1; ++ goto out; ++ } ++ } ++ + priv->stores->dlfop = store_methods->fop_download; + if (!priv->stores->dlfop) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, +@@ -196,6 +211,22 @@ out: + return ret; + } + ++int ++cs_forget(xlator_t *this, inode_t *inode) ++{ ++ uint64_t ctx_int = 0; ++ cs_inode_ctx_t *ctx = NULL; ++ ++ inode_ctx_del(inode, this, &ctx_int); ++ if (!ctx_int) ++ return 0; ++ ++ ctx = (cs_inode_ctx_t *)(uintptr_t)ctx_int; ++ ++ GF_FREE(ctx); ++ return 0; ++} ++ + void + cs_fini(xlator_t *this) + { +@@ -217,6 +248,9 @@ cs_reconfigure(xlator_t *this, dict_t *options) + goto out; + } + ++ GF_OPTION_RECONF("cloudsync-remote-read", priv->remote_read, options, bool, ++ out); ++ + /* needed only for per volume configuration*/ + ret = priv->stores->reconfigure(this, options); + +@@ -242,59 +276,6 @@ out: + } + + int32_t +-cs_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, +- dict_t *xdata) +-{ +- gf_dirent_t *tmp = NULL; +- char *sxattr = NULL; +- uint64_t ia_size = 0; +- int ret = 0; +- +- list_for_each_entry(tmp, &entries->list, list) +- { +- ret = dict_get_str(tmp->dict, GF_CS_OBJECT_SIZE, &sxattr); +- if (ret) { +- gf_msg_trace(this->name, 0, "size xattr found"); +- continue; +- } +- +- ia_size = atoll(sxattr); +- tmp->d_stat.ia_size = ia_size; +- } +- +- STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); +- return 0; +-} +- +-int32_t +-cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t off, dict_t *xdata) +-{ +- int ret = 0; +- int op_errno = ENOMEM; +- +- if (!xdata) { +- xdata = dict_new(); +- if (!xdata) { +- goto err; +- } +- } +- +- ret = dict_set_int32(xdata, GF_CS_OBJECT_SIZE, 1); +- if (ret) { +- goto err; +- } +- +- STACK_WIND(frame, cs_readdirp_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); +- return 0; +-err: +- STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL); +- return 0; +-} +- +-int32_t + cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +@@ -305,7 +286,6 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + local = frame->local; + +- /* Do we need lock here? */ + local->call_cnt++; + + if (op_ret == -1) { +@@ -320,13 +300,13 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + goto unwind; + } else { + __cs_inode_ctx_update(this, local->loc.inode, val); +- gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %ld", val); ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val); + + if (local->call_cnt == 1 && + (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) { + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + "will repair and download " +- "the file, current state : %ld", ++ "the file, current state : %" PRIu64, + val); + goto repair; + } else { +@@ -665,7 +645,7 @@ cs_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + if (op_ret == 0) { + ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val); + if (!ret) { +- gf_msg_debug(this->name, 0, "state %ld", val); ++ gf_msg_debug(this->name, 0, "state %" PRIu64, val); + ret = __cs_inode_ctx_update(this, fd->inode, val); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed"); +@@ -831,7 +811,7 @@ out: + return 0; + } + +-void * ++int + cs_download_task(void *arg) + { + call_frame_t *frame = NULL; +@@ -842,7 +822,6 @@ cs_download_task(void *arg) + fd_t *fd = NULL; + cs_local_t *local = NULL; + dict_t *dict = NULL; +- int *retval = NULL; + + frame = (call_frame_t *)arg; + +@@ -850,13 +829,6 @@ cs_download_task(void *arg) + + priv = this->private; + +- retval = GF_CALLOC(1, sizeof(int), gf_common_mt_int); +- if (!retval) { +- gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); +- ret = -1; +- goto out; +- } +- + if (!priv->stores) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "No remote store " +@@ -972,20 +944,13 @@ out: + local->dlfd = NULL; + } + +- if (retval) { +- *retval = ret; +- pthread_exit(retval); +- } else { +- pthread_exit(&ret); +- } ++ return ret; + } + + int + cs_download(call_frame_t *frame) + { +- int *retval = NULL; + int ret = 0; +- pthread_t dthread; + cs_local_t *local = NULL; + xlator_t *this = NULL; + +@@ -1000,16 +965,406 @@ cs_download(call_frame_t *frame) + goto out; + } + +- ret = gf_thread_create(&dthread, NULL, &cs_download_task, (void *)frame, +- "downloadthread"); ++ ret = cs_download_task((void *)frame); ++out: ++ return ret; ++} + +- pthread_join(dthread, (void **)&retval); ++int ++cs_set_xattr_req(call_frame_t *frame) ++{ ++ cs_local_t *local = NULL; ++ GF_UNUSED int ret = 0; ++ ++ local = frame->local; ++ ++ /* When remote reads are performed (i.e. reads on remote store), ++ * there needs to be a way to associate a file on gluster volume ++ * with its correspnding file on the remote store. In order to do ++ * that, a unique key can be maintained as an xattr ++ * (GF_CS_XATTR_ARCHIVE_UUID)on the stub file on gluster bricks. ++ * This xattr should be provided to the plugin to ++ * perform the read fop on the correct file. This assumes that the file ++ * hierarchy and name need not be the same on remote store as that of ++ * the gluster volume. ++ */ ++ ret = dict_set_str(local->xattr_req, GF_CS_XATTR_ARCHIVE_UUID, "1"); ++ ++ return 0; ++} + +- ret = *retval; ++int ++cs_update_xattrs(call_frame_t *frame, dict_t *xdata) ++{ ++ cs_local_t *local = NULL; ++ xlator_t *this = NULL; ++ int size = -1; ++ GF_UNUSED int ret = 0; ++ ++ local = frame->local; ++ this = frame->this; ++ ++ local->xattrinfo.lxattr = GF_CALLOC(1, sizeof(cs_loc_xattr_t), ++ gf_cs_mt_cs_lxattr_t); ++ if (!local->xattrinfo.lxattr) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ ++ gf_uuid_copy(local->xattrinfo.lxattr->gfid, local->loc.gfid); ++ ++ if (local->remotepath) { ++ local->xattrinfo.lxattr->file_path = gf_strdup(local->remotepath); ++ if (!local->xattrinfo.lxattr->file_path) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ } ++ ++ ret = dict_get_gfuuid(xdata, GF_CS_XATTR_ARCHIVE_UUID, ++ &(local->xattrinfo.lxattr->uuid)); ++ ++ if (ret) { ++ gf_uuid_clear(local->xattrinfo.lxattr->uuid); ++ } ++ size = strlen(this->name) - strlen("-cloudsync") + 1; ++ local->xattrinfo.lxattr->volname = GF_CALLOC(1, size, gf_common_mt_char); ++ if (!local->xattrinfo.lxattr->volname) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ strncpy(local->xattrinfo.lxattr->volname, this->name, size - 1); ++ local->xattrinfo.lxattr->volname[size - 1] = '\0'; ++ ++ return 0; ++err: ++ cs_xattrinfo_wipe(local); ++ return -1; ++} ++ ++int ++cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags) ++{ ++ xlator_t *this = NULL; ++ cs_private_t *priv = NULL; ++ int ret = -1; ++ fd_t *fd = NULL; ++ cs_local_t *local = NULL; ++ ++ local = frame->local; ++ this = frame->this; ++ priv = this->private; ++ ++ if (!local->remotepath) { ++ ret = -1; ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "remote path not" ++ " available. Check posix logs to resolve"); ++ goto out; ++ } ++ ++ if (!priv->stores) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "No remote store " ++ "plugins found"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (local->fd) { ++ fd = fd_anonymous(local->fd->inode); ++ } else { ++ fd = fd_anonymous(local->loc.inode); ++ } ++ ++ local->xattrinfo.size = size; ++ local->xattrinfo.offset = offset; ++ local->xattrinfo.flags = flags; ++ ++ if (!fd) { ++ gf_msg("CS", GF_LOG_ERROR, 0, 0, "fd creation failed"); ++ ret = -1; ++ goto out; ++ } ++ ++ local->dlfd = fd; ++ local->dloffset = offset; ++ ++ /*this calling method is for per volume setting */ ++ ret = priv->stores->rdfop(frame, priv->stores->config); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "read failed" ++ ", remotepath: %s", ++ local->remotepath); ++ ret = -1; ++ goto out; ++ } else { ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, ++ "read success, path" ++ " : %s", ++ local->remotepath); ++ } + + out: +- if (retval) +- GF_FREE(retval); ++ if (fd) { ++ fd_unref(fd); ++ local->dlfd = NULL; ++ } ++ return ret; ++} ++ ++int32_t ++cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iovec *vector, int32_t count, ++ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) ++{ ++ cs_local_t *local = NULL; ++ int ret = 0; ++ uint64_t val = 0; ++ fd_t *fd = NULL; ++ ++ local = frame->local; ++ fd = local->fd; ++ ++ local->call_cnt++; ++ ++ if (op_ret == -1) { ++ ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val); ++ if (ret == 0) { ++ if (val == GF_CS_ERROR) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "could not get file state, unwinding"); ++ op_ret = -1; ++ op_errno = EIO; ++ goto unwind; ++ } else { ++ __cs_inode_ctx_update(this, fd->inode, val); ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val); ++ ++ if (local->call_cnt == 1 && ++ (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, ++ " will read from remote : %" PRIu64, val); ++ goto repair; ++ } else { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "second readv, Unwinding"); ++ goto unwind; ++ } ++ } ++ } else { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "file state " ++ "could not be figured, unwinding"); ++ goto unwind; ++ } ++ } else { ++ /* successful readv => file is local */ ++ __cs_inode_ctx_update(this, fd->inode, GF_CS_LOCAL); ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, ++ "state : GF_CS_LOCAL" ++ ", readv successful"); ++ ++ goto unwind; ++ } ++ ++repair: ++ ret = locate_and_execute(frame); ++ if (ret) { ++ goto unwind; ++ } ++ ++ return 0; ++ ++unwind: ++ CS_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf, ++ iobref, xdata); ++ ++ return 0; ++} ++ ++int32_t ++cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata) ++{ ++ int ret = 0; ++ ++ ret = cs_resume_postprocess(this, frame, fd->inode); ++ if (ret) { ++ goto unwind; ++ } ++ ++ cs_inodelk_unlock(frame); ++ ++ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); ++ ++ return 0; ++ ++unwind: ++ cs_inodelk_unlock(frame); ++ ++ cs_common_cbk(frame); ++ ++ return 0; ++} ++ ++int32_t ++cs_resume_remote_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ size_t size, off_t offset, uint32_t flags, dict_t *xdata) ++{ ++ int ret = 0; ++ cs_local_t *local = NULL; ++ gf_cs_obj_state state = -1; ++ cs_inode_ctx_t *ctx = NULL; ++ ++ cs_inodelk_unlock(frame); ++ ++ local = frame->local; ++ if (!local) { ++ ret = -1; ++ goto unwind; ++ } ++ ++ __cs_inode_ctx_get(this, fd->inode, &ctx); ++ ++ state = __cs_get_file_state(this, fd->inode, ctx); ++ if (state == GF_CS_ERROR) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "status is GF_CS_ERROR." ++ " Aborting readv"); ++ local->op_ret = -1; ++ local->op_errno = EREMOTE; ++ ret = -1; ++ goto unwind; ++ } ++ ++ /* Serve readv from remote store only if it is remote. */ ++ gf_msg_debug(this->name, 0, "status of file %s is %d", ++ local->remotepath ? local->remotepath : "", state); ++ ++ /* We will reach this condition if local inode ctx had REMOTE ++ * state when the control was in cs_readv but after stat ++ * we got an updated state saying that the file is LOCAL. ++ */ ++ if (state == GF_CS_LOCAL) { ++ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, ++ xdata); ++ } else if (state == GF_CS_REMOTE) { ++ ret = cs_resume_remote_readv_postprocess(this, frame, fd->inode, offset, ++ size, flags); ++ /* Failed to submit the remote readv fop to plugin */ ++ if (ret) { ++ local->op_ret = -1; ++ local->op_errno = EREMOTE; ++ goto unwind; ++ } ++ /* When the file is in any other intermediate state, ++ * we should not perform remote reads. ++ */ ++ } else { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ ++ return 0; ++ ++unwind: ++ cs_common_cbk(frame); ++ ++ return 0; ++} ++ ++int32_t ++cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata) ++{ ++ int op_errno = -1; ++ cs_local_t *local = NULL; ++ int ret = 0; ++ cs_inode_ctx_t *ctx = NULL; ++ gf_cs_obj_state state = -1; ++ cs_private_t *priv = NULL; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(fd, err); ++ ++ priv = this->private; ++ ++ local = cs_local_init(this, frame, NULL, fd, GF_FOP_READ); ++ if (!local) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed"); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ __cs_inode_ctx_get(this, fd->inode, &ctx); ++ ++ if (ctx) ++ state = __cs_get_file_state(this, fd->inode, ctx); ++ else ++ state = GF_CS_LOCAL; ++ ++ local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new()); ++ ++ ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "dict_set failed key:" ++ " %s", ++ GF_CS_OBJECT_STATUS); ++ goto err; ++ } ++ ++ if (priv->remote_read) { ++ local->stub = fop_readv_stub(frame, cs_resume_remote_readv, fd, size, ++ offset, flags, xdata); ++ } else { ++ local->stub = fop_readv_stub(frame, cs_resume_readv, fd, size, offset, ++ flags, xdata); ++ } ++ if (!local->stub) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ if (state == GF_CS_LOCAL) { ++ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, ++ xdata); ++ } else { ++ local->call_cnt++; ++ ret = locate_and_execute(frame); ++ if (ret) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ } ++ ++ return 0; ++ ++err: ++ CS_STACK_UNWIND(readv, frame, -1, op_errno, NULL, -1, NULL, NULL, NULL); ++ ++ return 0; ++} ++ ++int ++cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame, ++ inode_t *inode, off_t offset, size_t size, ++ uint32_t flags) ++{ ++ int ret = 0; ++ ++ ret = cs_serve_readv(frame, offset, size, flags); + + return ret; + } +@@ -1059,7 +1414,7 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + goto err; + } else { + ret = __cs_inode_ctx_update(this, inode, val); +- gf_msg_debug(this->name, 0, "status : %lu", val); ++ gf_msg_debug(this->name, 0, "status : %" PRIu64, val); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed"); + local->op_ret = -1; +@@ -1087,6 +1442,10 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + gf_msg_debug(this->name, 0, "NULL filepath"); + } + ++ ret = cs_update_xattrs(frame, xdata); ++ if (ret) ++ goto err; ++ + local->op_ret = 0; + local->xattr_rsp = dict_ref(xdata); + memcpy(&local->stbuf, stbuf, sizeof(struct iatt)); +@@ -1121,6 +1480,8 @@ cs_do_stat_check(call_frame_t *main_frame) + goto err; + } + ++ cs_set_xattr_req(main_frame); ++ + if (local->fd) { + STACK_WIND(main_frame, cs_stat_check_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, local->fd, local->xattr_req); +@@ -1177,6 +1538,10 @@ cs_common_cbk(call_frame_t *frame) + NULL, NULL, NULL); + break; + ++ case GF_FOP_TRUNCATE: ++ CS_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, ++ NULL, NULL, NULL); ++ break; + default: + break; + } +@@ -1427,7 +1792,7 @@ __cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx) + if (ret) + *ctx = NULL; + else +- *ctx = (cs_inode_ctx_t *)ctxint; ++ *ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; + + return; + } +@@ -1452,7 +1817,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val) + + ctx->state = val; + +- ctxint = (uint64_t)ctx; ++ ctxint = (uint64_t)(uintptr_t)ctx; + + ret = __inode_ctx_set(inode, this, &ctxint); + if (ret) { +@@ -1460,7 +1825,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val) + goto out; + } + } else { +- ctx = (cs_inode_ctx_t *)ctxint; ++ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; + + ctx->state = val; + } +@@ -1483,7 +1848,7 @@ cs_inode_ctx_reset(xlator_t *this, inode_t *inode) + return 0; + } + +- ctx = (cs_inode_ctx_t *)ctxint; ++ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; + + GF_FREE(ctx); + return 0; +@@ -1532,6 +1897,57 @@ cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode) + out: + return ret; + } ++ ++int32_t ++__cs_get_dict_str(char **str, dict_t *xattr, const char *name, int *errnum) ++{ ++ data_t *data = NULL; ++ int ret = -1; ++ ++ assert(str != NULL); ++ ++ data = dict_get(xattr, (char *)name); ++ if (!data) { ++ *errnum = ENODATA; ++ goto out; ++ } ++ ++ *str = GF_CALLOC(data->len + 1, sizeof(char), gf_common_mt_char); ++ if (!(*str)) { ++ *errnum = ENOMEM; ++ goto out; ++ } ++ ++ memcpy(*str, data->data, sizeof(char) * (data->len)); ++ return 0; ++ ++out: ++ return ret; ++} ++ ++int32_t ++__cs_get_dict_uuid(uuid_t uuid, dict_t *xattr, const char *name, int *errnum) ++{ ++ data_t *data = NULL; ++ int ret = -1; ++ ++ assert(uuid != NULL); ++ ++ data = dict_get(xattr, (char *)name); ++ if (!data) { ++ *errnum = ENODATA; ++ goto out; ++ } ++ ++ assert(data->len == sizeof(uuid_t)); ++ ++ gf_uuid_copy(uuid, (unsigned char *)data->data); ++ return 0; ++ ++out: ++ return ret; ++} ++ + int32_t + cs_fdctx_to_dict(xlator_t *this, fd_t *fd, dict_t *dict) + { +@@ -1606,7 +2022,6 @@ cs_notify(xlator_t *this, int event, void *data, ...) + + struct xlator_fops cs_fops = { + .stat = cs_stat, +- .readdirp = cs_readdirp, + .truncate = cs_truncate, + .seek = cs_seek, + .statfs = cs_statfs, +@@ -1627,7 +2042,9 @@ struct xlator_fops cs_fops = { + .zerofill = cs_zerofill, + }; + +-struct xlator_cbks cs_cbks = {}; ++struct xlator_cbks cs_cbks = { ++ .forget = cs_forget, ++}; + + struct xlator_dumpops cs_dumpops = { + .fdctx_to_dict = cs_fdctx_to_dict, +@@ -1647,6 +2064,15 @@ struct volume_options cs_options[] = { + {.key = {"cloudsync-storetype"}, + .type = GF_OPTION_TYPE_STR, + .description = "Defines which remote store is enabled"}, ++ {.key = {"cloudsync-remote-read"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .description = "Defines a remote read fop when on"}, ++ {.key = {"cloudsync-store-id"}, ++ .type = GF_OPTION_TYPE_STR, ++ .description = "Defines a volume wide store id"}, ++ {.key = {"cloudsync-product-id"}, ++ .type = GF_OPTION_TYPE_STR, ++ .description = "Defines a volume wide product id"}, + {.key = {NULL}}, + }; + +diff --git a/xlators/features/cloudsync/src/cloudsync.h b/xlators/features/cloudsync/src/cloudsync.h +index dbdb207..0cb800a 100644 +--- a/xlators/features/cloudsync/src/cloudsync.h ++++ b/xlators/features/cloudsync/src/cloudsync.h +@@ -19,6 +19,7 @@ + #include "cloudsync-common.h" + #include "cloudsync-autogen-fops.h" + ++#define ALIGN_SIZE 4096 + #define CS_LOCK_DOMAIN "cs.protect.file.stat" + typedef struct cs_dlstore { + off_t off; +@@ -29,6 +30,7 @@ typedef struct cs_dlstore { + } cs_dlstore; + + typedef struct cs_inode_ctx { ++ cs_loc_xattr_t locxattr; + gf_cs_obj_state state; + } cs_inode_ctx_t; + +@@ -100,4 +102,22 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t + cs_resume_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, + off_t offset, dict_t *xattr_req); ++ ++int32_t ++cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iovec *vector, int32_t count, ++ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata); ++int32_t ++cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata); ++int32_t ++cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata); ++ ++int ++cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame, ++ inode_t *inode, off_t offset, size_t size, ++ uint32_t flags); ++int ++cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags); + #endif /* __CLOUDSYNC_H__ */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 4b32fb6..73abf37 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3693,7 +3693,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = GD_OP_VERSION_5_0, + .description = "enable/disable noatime option with ctime enabled.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, +- {.key = "feature.cloudsync-storetype", ++ {.key = "features.cloudsync-storetype", + .voltype = "features/cloudsync", + .op_version = GD_OP_VERSION_5_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, +@@ -3721,4 +3721,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .validate_fn = validate_boolean, + .description = "option to enforce mandatory lock on a file", + .flags = VOLOPT_FLAG_XLATOR_OPT}, ++ {.key = "features.cloudsync-remote-read", ++ .voltype = "features/cloudsync", ++ .value = "off", ++ .op_version = GD_OP_VERSION_6_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = NULL}}; +-- +1.8.3.1 + diff --git a/SOURCES/0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch b/SOURCES/0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch new file mode 100644 index 0000000..6068678 --- /dev/null +++ b/SOURCES/0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch @@ -0,0 +1,1394 @@ +From b402b89f71a3ebabca24c459f106af1f9610939a Mon Sep 17 00:00:00 2001 +From: Anuradha Talur +Date: Fri, 30 Nov 2018 11:23:07 -0800 +Subject: [PATCH 154/169] cloudsync/cvlt: Cloudsync plugin for commvault store + +backport of: https://review.gluster.org/#/c/glusterfs/+/21771/ + +> Change-Id: Icbe53e78e9c4f6699c7a26a806ef4b14b39f5019 +> updates: bz#1642168 +> Signed-off-by: Anuradha Talur + +Change-Id: Ib543605daa51fa1cfe77ed475390a30ef14e6452 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/172194 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + configure.ac | 13 + + glusterfs.spec.in | 1 + + libglusterfs/src/glusterfs/glfs-message-id.h | 1 + + .../src/cloudsync-plugins/src/Makefile.am | 6 +- + .../src/cloudsync-plugins/src/cvlt/Makefile.am | 3 + + .../src/cloudsync-plugins/src/cvlt/src/Makefile.am | 12 + + .../cloudsync-plugins/src/cvlt/src/archivestore.h | 203 +++++ + .../cloudsync-plugins/src/cvlt/src/cvlt-messages.h | 30 + + .../src/cvlt/src/libcloudsynccvlt.sym | 1 + + .../src/cvlt/src/libcvlt-mem-types.h | 19 + + .../src/cloudsync-plugins/src/cvlt/src/libcvlt.c | 842 +++++++++++++++++++++ + .../src/cloudsync-plugins/src/cvlt/src/libcvlt.h | 84 ++ + xlators/features/cloudsync/src/cloudsync.c | 6 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 10 +- + 14 files changed, 1228 insertions(+), 3 deletions(-) + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h + +diff --git a/configure.ac b/configure.ac +index 0e11d4c..f597b86 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -170,6 +170,8 @@ AC_CONFIG_FILES([Makefile + xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile + xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile + xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile ++ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile ++ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile + xlators/playground/Makefile + xlators/playground/template/Makefile + xlators/playground/template/src/Makefile +@@ -937,6 +939,17 @@ AM_CONDITIONAL([BUILD_AMAZONS3_PLUGIN], [test "x$HAVE_AMAZONS3" = "xyes"]) + if test "x$HAVE_AMAZONS3" = "xyes";then + BUILD_CLOUDSYNC="yes" + fi ++BUILD_CVLT_PLUGIN="no" ++case $host_os in ++#enable cvlt plugin only for linux platforms ++ linux*) ++ BUILD_CVLT_PLUGIN="yes" ++ BUILD_CLOUDSYNC="yes" ++ ;; ++ *) ++ ;; ++esac ++AM_CONDITIONAL([BUILD_CVLT_PLUGIN], [test "x$BUILD_CVLT_PLUGIN" = "xyes"]) + AM_CONDITIONAL([BUILD_CLOUDSYNC], [test "x$BUILD_CLOUDSYNC" = "xyes"]) + dnl end cloudsync section + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index ed58356..85e75f2 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1199,6 +1199,7 @@ exit 0 + %files cloudsync-plugins + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins + %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsyncs3.so ++ %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsynccvlt.so + + %files devel + %dir %{_includedir}/glusterfs +diff --git a/libglusterfs/src/glusterfs/glfs-message-id.h b/libglusterfs/src/glusterfs/glfs-message-id.h +index 001f4ab..a1a16ca 100644 +--- a/libglusterfs/src/glusterfs/glfs-message-id.h ++++ b/libglusterfs/src/glusterfs/glfs-message-id.h +@@ -93,6 +93,7 @@ enum _msgid_comp { + GLFS_MSGID_COMP(TEMPLATE, 1), + GLFS_MSGID_COMP(UTIME, 1), + GLFS_MSGID_COMP(SNAPVIEW_SERVER, 1), ++ GLFS_MSGID_COMP(CVLT, 1), + /* --- new segments for messages goes above this line --- */ + + GLFS_MSGID_END +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am +index 4deefb6..fb6b058 100644 +--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am +@@ -2,6 +2,10 @@ if BUILD_AMAZONS3_PLUGIN + AMAZONS3_DIR = cloudsyncs3 + endif + +-SUBDIRS = ${AMAZONS3_DIR} ++if BUILD_CVLT_PLUGIN ++ CVLT_DIR = cvlt ++endif ++ ++SUBDIRS = ${AMAZONS3_DIR} ${CVLT_DIR} + + CLEANFILES = +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am +new file mode 100644 +index 0000000..a985f42 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am +@@ -0,0 +1,3 @@ ++SUBDIRS = src ++ ++CLEANFILES = +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am +new file mode 100644 +index 0000000..b512464 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am +@@ -0,0 +1,12 @@ ++csp_LTLIBRARIES = cloudsynccvlt.la ++cspdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins ++ ++cloudsynccvlt_la_SOURCES = libcvlt.c $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-common.c ++cloudsynccvlt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la ++cloudsynccvlt_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym ++AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src ++noinst_HEADERS = archivestore.h libcvlt.h libcvlt-mem-types.h cvlt-messages.h ++AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) -I$(top_srcdir)/xlators/features/cloudsync/src ++CLEANFILES = ++ ++EXTRA_DIST = libcloudsynccvlt.sym +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h +new file mode 100644 +index 0000000..7230ef7 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h +@@ -0,0 +1,203 @@ ++/* ++ Copyright (c) 2018 Commvault Systems, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __ARCHIVESTORE_H__ ++#define __ARCHIVESTORE_H__ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid" ++#define CS_XATTR_PRODUCT_ID "trusted.cloudsync.product-id" ++#define CS_XATTR_STORE_ID "trusted.cloudsync.store-id" ++ ++struct _archstore_methods; ++typedef struct _archstore_methods archstore_methods_t; ++ ++struct _archstore_desc { ++ void *priv; /* Private field for store mgmt. */ ++ /* To be used only by archive store*/ ++}; ++typedef struct _archstore_desc archstore_desc_t; ++ ++struct _archstore_info { ++ char *id; /* Identifier for the archivestore */ ++ uint32_t idlen; /* Length of identifier string */ ++ char *prod; /* Name of the data mgmt. product */ ++ uint32_t prodlen; /* Length of the product string */ ++}; ++typedef struct _archstore_info archstore_info_t; ++ ++struct _archstore_fileinfo { ++ uuid_t uuid; /* uuid of the file */ ++ char *path; /* file path */ ++ uint32_t pathlength; /* length of file path */ ++}; ++typedef struct _archstore_fileinfo archstore_fileinfo_t; ++ ++struct _app_callback_info { ++ archstore_info_t *src_archstore; ++ archstore_fileinfo_t *src_archfile; ++ archstore_info_t *dest_archstore; ++ archstore_fileinfo_t *dest_archfile; ++}; ++typedef struct _app_callback_info app_callback_info_t; ++ ++typedef void (*app_callback_t)(archstore_desc_t *, app_callback_info_t *, ++ void *, int64_t, int32_t); ++ ++enum _archstore_scan_type { FULL = 1, INCREMENTAL = 2 }; ++typedef enum _archstore_scan_type archstore_scan_type_t; ++ ++typedef int32_t archstore_errno_t; ++ ++/* ++ * Initialize archive store. ++ * arg1 pointer to structure containing archive store information ++ * arg2 error number if any generated during the initialization ++ * arg3 name of the log file ++ */ ++typedef int32_t (*init_archstore_t)(archstore_desc_t *, archstore_errno_t *, ++ const char *); ++ ++/* ++ * Clean up archive store. ++ * arg1 pointer to structure containing archive store information ++ * arg2 error number if any generated during the cleanup ++ */ ++typedef int32_t (*term_archstore_t)(archstore_desc_t *, archstore_errno_t *); ++ ++/* ++ * Read the contents of the file from archive store ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing archive store information ++ * arg3 pointer to structure containing information about file to be read ++ * arg4 offset in the file from which data should be read ++ * arg5 buffer where the data should be read ++ * arg6 number of bytes of data to be read ++ * arg7 error number if any generated during the read from file ++ * arg8 callback handler to be invoked after the data is read ++ * arg9 cookie to be passed when callback is invoked ++ */ ++typedef int32_t (*read_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_fileinfo_t *, off_t, char *, ++ size_t, archstore_errno_t *, app_callback_t, ++ void *); ++ ++/* ++ * Restore the contents of the file from archive store ++ * This is basically in-place restore ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing archive store information ++ * arg3 pointer to structure containing information about file to be restored ++ * arg4 error number if any generated during the file restore ++ * arg5 callback to be invoked after the file is restored ++ * arg6 cookie to be passed when callback is invoked ++ */ ++typedef int32_t (*recall_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_errno_t *, app_callback_t, ++ void *); ++ ++/* ++ * Restore the contents of the file from archive store to a different store ++ * This is basically out-of-place restore ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing source archive store information ++ * arg3 pointer to structure containing information about file to be restored ++ * arg4 pointer to structure containing destination archive store information ++ * arg5 pointer to structure containing information about the location to ++ which the file will be restored ++ * arg6 error number if any generated during the file restore ++ * arg7 callback to be invoked after the file is restored ++ * arg8 cookie to be passed when callback is invoked ++ */ ++typedef int32_t (*restore_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_errno_t *, app_callback_t, ++ void *); ++ ++/* ++ * Archive the contents of the file to archive store ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing source archive store information ++ * arg3 pointer to structure containing information about files to be archived ++ * arg4 pointer to structure containing destination archive store information ++ * arg5 pointer to structure containing information about files that failed ++ * to be archived ++ * arg6 error number if any generated during the file archival ++ * arg7 callback to be invoked after the file is archived ++ * arg8 cookie to be passed when callback is invoked ++ */ ++typedef int32_t (*archive_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_errno_t *, app_callback_t, ++ void *); ++ ++/* ++ * Backup list of files provided in the input file ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing source archive store information ++ * arg3 pointer to structure containing information about files to be backed up ++ * arg4 pointer to structure containing destination archive store information ++ * arg5 pointer to structure containing information about files that failed ++ * to be backed up ++ * arg6 error number if any generated during the file archival ++ * arg7 callback to be invoked after the file is archived ++ * arg8 cookie to be passed when callback is invoked ++ */ ++typedef int32_t (*backup_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_errno_t *, app_callback_t, ++ void *); ++ ++/* ++ * Scan the contents of a store and determine the files which need to be ++ * backed up. ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing archive store information ++ * arg3 type of scan whether full or incremental ++ * arg4 path to file that contains list of files to be backed up ++ * arg5 error number if any generated during scan operation ++ */ ++typedef int32_t (*scan_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_scan_type_t, char *, ++ archstore_errno_t *); ++ ++struct _archstore_methods { ++ init_archstore_t init; ++ term_archstore_t fini; ++ backup_archstore_t backup; ++ archive_archstore_t archive; ++ scan_archstore_t scan; ++ restore_archstore_t restore; ++ recall_archstore_t recall; ++ read_archstore_t read; ++}; ++ ++typedef int (*get_archstore_methods_t)(archstore_methods_t *); ++ ++/* ++ * Single function that will be invoked by applications for extracting ++ * the function pointers to all data management functions. ++ */ ++int32_t ++get_archstore_methods(archstore_methods_t *); ++ ++#endif /* End of __ARCHIVESTORE_H__ */ +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h +new file mode 100644 +index 0000000..57c9aa7 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h +@@ -0,0 +1,30 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++ */ ++ ++#ifndef _CVLT_MESSAGES_H_ ++#define _CVLT_MESSAGES_H_ ++ ++#include ++ ++/* To add new message IDs, append new identifiers at the end of the list. ++ * ++ * Never remove a message ID. If it's not used anymore, you can rename it or ++ * leave it as it is, but not delete it. This is to prevent reutilization of ++ * IDs by other messages. ++ * ++ * The component name must match one of the entries defined in ++ * glfs-message-id.h. ++ */ ++ ++GLFS_MSGID(CVLT, CVLT_EXTRACTION_FAILED, CVLT_FREE, ++ CVLT_RESOURCE_ALLOCATION_FAILED, CVLT_RESTORE_FAILED, ++ CVLT_READ_FAILED, CVLT_NO_MEMORY, CVLT_DLOPEN_FAILED); ++ ++#endif /* !_CVLT_MESSAGES_H_ */ +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym +new file mode 100644 +index 0000000..0bc2736 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym +@@ -0,0 +1 @@ ++store_ops +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h +new file mode 100644 +index 0000000..c24fab8 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h +@@ -0,0 +1,19 @@ ++/* ++ * Copyright (c) 2018 Commvault Systems, Inc. ++ * This file is part of GlusterFS. ++ * ++ * This file is licensed to you under your choice of the GNU Lesser ++ * General Public License, version 3 or any later version (LGPLv3 or ++ * later), or the GNU General Public License, version 2 (GPLv2), in all ++ * cases as published by the Free Software Foundation. ++ */ ++ ++#ifndef __LIBCVLT_MEM_TYPES_H__ ++#define __LIBCVLT_MEM_TYPES_H__ ++ ++#include ++enum libcvlt_mem_types_ { ++ gf_libcvlt_mt_cvlt_private_t = gf_common_mt_end + 1, ++ gf_libcvlt_mt_end ++}; ++#endif /* __LIBCVLT_MEM_TYPES_H__ */ +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c +new file mode 100644 +index 0000000..e827882 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c +@@ -0,0 +1,842 @@ ++#include ++#include ++#include ++#include "libcvlt.h" ++#include "cloudsync-common.h" ++#include "cvlt-messages.h" ++ ++#define LIBARCHIVE_SO "libopenarchive.so" ++#define ALIGN_SIZE 4096 ++#define CVLT_TRAILER "cvltv1" ++ ++store_methods_t store_ops = { ++ .fop_download = cvlt_download, ++ .fop_init = cvlt_init, ++ .fop_reconfigure = cvlt_reconfigure, ++ .fop_fini = cvlt_fini, ++ .fop_remote_read = cvlt_read, ++}; ++ ++static const int32_t num_req = 32; ++static const int32_t num_iatt = 32; ++static char *plugin = "cvlt_cloudSync"; ++ ++int32_t ++mem_acct_init(xlator_t *this) ++{ ++ int ret = -1; ++ ++ if (!this) ++ return ret; ++ ++ ret = xlator_mem_acct_init(this, gf_libcvlt_mt_end + 1); ++ ++ if (ret != 0) { ++ return ret; ++ } ++ ++ return ret; ++} ++ ++static void ++cvlt_free_resources(archive_t *arch) ++{ ++ /* ++ * We will release all the resources that were allocated by the xlator. ++ * Check whether there are any buffers which have not been released ++ * back to a mempool. ++ */ ++ ++ if (arch->handle) { ++ dlclose(arch->handle); ++ } ++ ++ if (arch->iobuf_pool) { ++ iobuf_pool_destroy(arch->iobuf_pool); ++ } ++ ++ if (arch->req_pool) { ++ mem_pool_destroy(arch->req_pool); ++ arch->req_pool = NULL; ++ } ++ ++ return; ++} ++ ++static int32_t ++cvlt_extract_store_fops(xlator_t *this, archive_t *arch) ++{ ++ int32_t op_ret = -1; ++ get_archstore_methods_t get_archstore_methods; ++ ++ /* ++ * libopenarchive.so defines methods for performing data management ++ * operations. We will extract the methods from library and these ++ * methods will be invoked for moving data between glusterfs volume ++ * and the data management product. ++ */ ++ ++ VALIDATE_OR_GOTO(arch, err); ++ ++ arch->handle = dlopen(LIBARCHIVE_SO, RTLD_NOW); ++ if (!arch->handle) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_DLOPEN_FAILED, ++ " failed to open %s ", LIBARCHIVE_SO); ++ return op_ret; ++ } ++ ++ dlerror(); /* Clear any existing error */ ++ ++ get_archstore_methods = dlsym(arch->handle, "get_archstore_methods"); ++ if (!get_archstore_methods) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " Error extracting get_archstore_methods()"); ++ dlclose(arch->handle); ++ arch->handle = NULL; ++ return op_ret; ++ } ++ ++ op_ret = get_archstore_methods(&(arch->fops)); ++ if (op_ret) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " Failed to extract methods in get_archstore_methods"); ++ dlclose(arch->handle); ++ arch->handle = NULL; ++ return op_ret; ++ } ++ ++err: ++ return op_ret; ++} ++ ++static int32_t ++cvlt_alloc_resources(xlator_t *this, archive_t *arch, int num_req, int num_iatt) ++{ ++ /* ++ * Initialize information about all the memory pools that will be ++ * used by this xlator. ++ */ ++ arch->nreqs = 0; ++ ++ arch->req_pool = NULL; ++ ++ arch->handle = NULL; ++ arch->xl = this; ++ ++ arch->req_pool = mem_pool_new(cvlt_request_t, num_req); ++ if (!arch->req_pool) { ++ goto err; ++ } ++ ++ arch->iobuf_pool = iobuf_pool_new(); ++ if (!arch->iobuf_pool) { ++ goto err; ++ } ++ ++ if (cvlt_extract_store_fops(this, arch)) { ++ goto err; ++ } ++ ++ return 0; ++ ++err: ++ ++ return -1; ++} ++ ++static void ++cvlt_req_init(cvlt_request_t *req) ++{ ++ sem_init(&(req->sem), 0, 0); ++ ++ return; ++} ++ ++static void ++cvlt_req_destroy(cvlt_request_t *req) ++{ ++ if (req->iobuf) { ++ iobuf_unref(req->iobuf); ++ } ++ ++ if (req->iobref) { ++ iobref_unref(req->iobref); ++ } ++ ++ sem_destroy(&(req->sem)); ++ ++ return; ++} ++ ++static cvlt_request_t * ++cvlt_alloc_req(archive_t *arch) ++{ ++ cvlt_request_t *reqptr = NULL; ++ ++ if (!arch) { ++ goto err; ++ } ++ ++ if (arch->req_pool) { ++ reqptr = mem_get0(arch->req_pool); ++ if (reqptr) { ++ cvlt_req_init(reqptr); ++ } ++ } ++ ++ if (reqptr) { ++ LOCK(&(arch->lock)); ++ arch->nreqs++; ++ UNLOCK(&(arch->lock)); ++ } ++ ++err: ++ return reqptr; ++} ++ ++static int32_t ++cvlt_free_req(archive_t *arch, cvlt_request_t *reqptr) ++{ ++ if (!reqptr) { ++ goto err; ++ } ++ ++ if (!arch) { ++ goto err; ++ } ++ ++ if (arch->req_pool) { ++ /* ++ * Free the request resources if they exist. ++ */ ++ ++ cvlt_req_destroy(reqptr); ++ mem_put(reqptr); ++ ++ LOCK(&(arch->lock)); ++ arch->nreqs--; ++ UNLOCK(&(arch->lock)); ++ } ++ ++ return 0; ++ ++err: ++ return -1; ++} ++ ++static int32_t ++cvlt_init_xlator(xlator_t *this, archive_t *arch, int num_req, int num_iatt) ++{ ++ int32_t ret = -1; ++ int32_t errnum = -1; ++ int32_t locked = 0; ++ ++ /* ++ * Perform all the initializations needed for brining up the xlator. ++ */ ++ if (!arch) { ++ goto err; ++ } ++ ++ LOCK_INIT(&(arch->lock)); ++ LOCK(&(arch->lock)); ++ ++ locked = 1; ++ ++ ret = cvlt_alloc_resources(this, arch, num_req, num_iatt); ++ ++ if (ret) { ++ goto err; ++ } ++ ++ /* ++ * Now that the fops have been extracted initialize the store ++ */ ++ ret = arch->fops.init(&(arch->descinfo), &errnum, plugin); ++ if (ret) { ++ goto err; ++ } ++ ++ UNLOCK(&(arch->lock)); ++ locked = 0; ++ ret = 0; ++ ++ return ret; ++ ++err: ++ cvlt_free_resources(arch); ++ ++ if (locked) { ++ UNLOCK(&(arch->lock)); ++ } ++ ++ return ret; ++} ++ ++static int32_t ++cvlt_term_xlator(archive_t *arch) ++{ ++ int32_t errnum = -1; ++ ++ if (!arch) { ++ goto err; ++ } ++ ++ LOCK(&(arch->lock)); ++ ++ /* ++ * Release the resources that have been allocated inside store ++ */ ++ arch->fops.fini(&(arch->descinfo), &errnum); ++ ++ cvlt_free_resources(arch); ++ ++ UNLOCK(&(arch->lock)); ++ ++ GF_FREE(arch); ++ ++ return 0; ++ ++err: ++ return -1; ++} ++ ++static int32_t ++cvlt_init_store_info(archive_t *priv, archstore_info_t *store_info) ++{ ++ if (!store_info) { ++ return -1; ++ } ++ ++ store_info->prod = priv->product_id; ++ store_info->prodlen = strlen(priv->product_id); ++ ++ store_info->id = priv->store_id; ++ store_info->idlen = strlen(priv->store_id); ++ ++ return 0; ++} ++ ++static int32_t ++cvlt_init_file_info(cs_loc_xattr_t *xattr, archstore_fileinfo_t *file_info) ++{ ++ if (!xattr || !file_info) { ++ return -1; ++ } ++ ++ gf_uuid_copy(file_info->uuid, xattr->uuid); ++ file_info->path = xattr->file_path; ++ file_info->pathlength = strlen(xattr->file_path); ++ ++ return 0; ++} ++ ++static int32_t ++cvlt_init_gluster_store_info(cs_loc_xattr_t *xattr, ++ archstore_info_t *store_info) ++{ ++ static char *product = "glusterfs"; ++ ++ if (!xattr || !store_info) { ++ return -1; ++ } ++ ++ store_info->prod = product; ++ store_info->prodlen = strlen(product); ++ ++ store_info->id = xattr->volname; ++ store_info->idlen = strlen(xattr->volname); ++ ++ return 0; ++} ++ ++static int32_t ++cvlt_init_gluster_file_info(cs_loc_xattr_t *xattr, ++ archstore_fileinfo_t *file_info) ++{ ++ if (!xattr || !file_info) { ++ return -1; ++ } ++ ++ gf_uuid_copy(file_info->uuid, xattr->gfid); ++ file_info->path = xattr->file_path; ++ file_info->pathlength = strlen(xattr->file_path); ++ ++ return 0; ++} ++ ++static void ++cvlt_copy_stat_info(struct iatt *buf, cs_size_xattr_t *xattrs) ++{ ++ /* ++ * If the file was archived then the reported size will not be a ++ * correct one. We need to fix this. ++ */ ++ if (buf && xattrs) { ++ buf->ia_size = xattrs->size; ++ buf->ia_blksize = xattrs->blksize; ++ buf->ia_blocks = xattrs->blocks; ++ } ++ ++ return; ++} ++ ++static void ++cvlt_readv_complete(archstore_desc_t *desc, app_callback_info_t *cbkinfo, ++ void *cookie, int64_t op_ret, int32_t op_errno) ++{ ++ struct iovec iov; ++ xlator_t *this = NULL; ++ struct iatt postbuf = { ++ 0, ++ }; ++ call_frame_t *frame = NULL; ++ cvlt_request_t *req = (cvlt_request_t *)cookie; ++ cs_local_t *local = NULL; ++ cs_private_t *cspriv = NULL; ++ archive_t *priv = NULL; ++ ++ frame = req->frame; ++ this = frame->this; ++ local = frame->local; ++ ++ cspriv = this->private; ++ priv = (archive_t *)cspriv->stores->config; ++ ++ if (strcmp(priv->trailer, CVLT_TRAILER)) { ++ op_ret = -1; ++ op_errno = EINVAL; ++ goto out; ++ } ++ ++ gf_msg_debug(plugin, 0, ++ " Read callback invoked offset:%" PRIu64 "bytes: %" PRIu64 ++ " op : %d ret : %" PRId64 " errno : %d", ++ req->offset, req->bytes, req->op_type, op_ret, op_errno); ++ ++ if (op_ret < 0) { ++ goto out; ++ } ++ ++ req->iobref = iobref_new(); ++ if (!req->iobref) { ++ op_ret = -1; ++ op_errno = ENOMEM; ++ goto out; ++ } ++ ++ iobref_add(req->iobref, req->iobuf); ++ iov.iov_base = iobuf_ptr(req->iobuf); ++ iov.iov_len = op_ret; ++ ++ cvlt_copy_stat_info(&postbuf, &(req->szxattr)); ++ ++ /* ++ * Hack to notify higher layers of EOF. ++ */ ++ if (!postbuf.ia_size || (req->offset + iov.iov_len >= postbuf.ia_size)) { ++ gf_msg_debug(plugin, 0, " signalling end-of-file for uuid=%s", ++ uuid_utoa(req->file_info.uuid)); ++ op_errno = ENOENT; ++ } ++ ++out: ++ ++ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, &postbuf, ++ req->iobref, local->xattr_rsp); ++ ++ if (req) { ++ cvlt_free_req(priv, req); ++ } ++ ++ return; ++} ++ ++static void ++cvlt_download_complete(archstore_desc_t *store, app_callback_info_t *cbk_info, ++ void *cookie, int64_t ret, int errcode) ++{ ++ cvlt_request_t *req = (cvlt_request_t *)cookie; ++ ++ gf_msg_debug(plugin, 0, ++ " Download callback invoked ret : %" PRId64 " errno : %d", ++ ret, errcode); ++ ++ req->op_ret = ret; ++ req->op_errno = errcode; ++ sem_post(&(req->sem)); ++ ++ return; ++} ++ ++void * ++cvlt_init(xlator_t *this) ++{ ++ int ret = 0; ++ archive_t *priv = NULL; ++ ++ if (!this->children || this->children->next) { ++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, ++ "should have exactly one child"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (!this->parents) { ++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, ++ "dangling volume. check volfile"); ++ ret = -1; ++ goto out; ++ } ++ ++ priv = GF_CALLOC(1, sizeof(archive_t), gf_libcvlt_mt_cvlt_private_t); ++ if (!priv) { ++ ret = -1; ++ goto out; ++ } ++ ++ priv->trailer = CVLT_TRAILER; ++ if (cvlt_init_xlator(this, priv, num_req, num_iatt)) { ++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, "xlator init failed"); ++ ret = -1; ++ goto out; ++ } ++ ++ GF_OPTION_INIT("cloudsync-store-id", priv->store_id, str, out); ++ GF_OPTION_INIT("cloudsync-product-id", priv->product_id, str, out); ++ ++ gf_msg(plugin, GF_LOG_INFO, 0, 0, ++ "store id is : %s " ++ "product id is : %s.", ++ priv->store_id, priv->product_id); ++out: ++ if (ret == -1) { ++ cvlt_term_xlator(priv); ++ return (NULL); ++ } ++ return priv; ++} ++ ++int ++cvlt_reconfigure(xlator_t *this, dict_t *options) ++{ ++ cs_private_t *cspriv = NULL; ++ archive_t *priv = NULL; ++ ++ cspriv = this->private; ++ priv = (archive_t *)cspriv->stores->config; ++ ++ if (strcmp(priv->trailer, CVLT_TRAILER)) ++ goto out; ++ ++ GF_OPTION_RECONF("cloudsync-store-id", priv->store_id, options, str, out); ++ ++ GF_OPTION_RECONF("cloudsync-product-id", priv->product_id, options, str, ++ out); ++ gf_msg_debug(plugin, 0, ++ "store id is : %s " ++ "product id is : %s.", ++ priv->store_id, priv->product_id); ++ return 0; ++out: ++ return -1; ++} ++ ++void ++cvlt_fini(void *config) ++{ ++ archive_t *priv = NULL; ++ ++ priv = (archive_t *)config; ++ ++ if (strcmp(priv->trailer, CVLT_TRAILER)) ++ return; ++ ++ cvlt_term_xlator(priv); ++ gf_msg(plugin, GF_LOG_INFO, 0, CVLT_FREE, " released xlator resources"); ++ return; ++} ++ ++int ++cvlt_download(call_frame_t *frame, void *config) ++{ ++ archive_t *parch = NULL; ++ cs_local_t *local = frame->local; ++ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr; ++ cvlt_request_t *req = NULL; ++ archstore_info_t dest_storeinfo; ++ archstore_fileinfo_t dest_fileinfo; ++ int32_t op_ret, op_errno; ++ ++ parch = (archive_t *)config; ++ ++ if (strcmp(parch->trailer, CVLT_TRAILER)) { ++ op_ret = -1; ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ gf_msg_debug(plugin, 0, " download invoked for uuid = %s gfid=%s ", ++ locxattr->uuid, uuid_utoa(locxattr->gfid)); ++ ++ if (!(parch->fops.restore)) { ++ op_errno = ELIBBAD; ++ goto err; ++ } ++ ++ /* ++ * Download needs to be processed. Allocate a request. ++ */ ++ req = cvlt_alloc_req(parch); ++ ++ if (!req) { ++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_RESOURCE_ALLOCATION_FAILED, ++ " failed to allocated request for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ /* ++ * Initialize the request object. ++ */ ++ req->op_type = CVLT_RESTORE_OP; ++ req->frame = frame; ++ ++ /* ++ * The file is currently residing inside a data management store. ++ * To restore the file contents we need to provide the information ++ * about data management store. ++ */ ++ op_ret = cvlt_init_store_info(parch, &(req->store_info)); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract store info for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ op_ret = cvlt_init_file_info(locxattr, &(req->file_info)); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract file info for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ /* ++ * We need t perform in-place restore of the file from data managment ++ * store to gusterfs volume. ++ */ ++ op_ret = cvlt_init_gluster_store_info(locxattr, &dest_storeinfo); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract destination store info for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ op_ret = cvlt_init_gluster_file_info(locxattr, &dest_fileinfo); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract file info for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ /* ++ * Submit the restore request. ++ */ ++ op_ret = parch->fops.restore(&(parch->descinfo), &(req->store_info), ++ &(req->file_info), &dest_storeinfo, ++ &dest_fileinfo, &op_errno, ++ cvlt_download_complete, req); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED, ++ " failed to restore file gfid=%s from data managment store", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ /* ++ * Wait for the restore to complete. ++ */ ++ sem_wait(&(req->sem)); ++ ++ if (req->op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED, ++ " restored failed for gfid=%s", uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ if (req) { ++ cvlt_free_req(parch, req); ++ } ++ ++ return 0; ++ ++err: ++ ++ if (req) { ++ cvlt_free_req(parch, req); ++ } ++ ++ return -1; ++} ++ ++int ++cvlt_read(call_frame_t *frame, void *config) ++{ ++ int32_t op_ret = -1; ++ int32_t op_errno = 0; ++ archive_t *parch = NULL; ++ cvlt_request_t *req = NULL; ++ struct iovec iov = { ++ 0, ++ }; ++ struct iobref *iobref; ++ size_t size = 0; ++ off_t off = 0; ++ ++ cs_local_t *local = frame->local; ++ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr; ++ ++ size = local->xattrinfo.size; ++ off = local->xattrinfo.offset; ++ ++ parch = (archive_t *)config; ++ ++ if (strcmp(parch->trailer, CVLT_TRAILER)) { ++ op_ret = -1; ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ gf_msg_debug(plugin, 0, ++ " read invoked for gfid = %s offset = %" PRIu64 ++ " file_size = %" PRIu64, ++ uuid_utoa(locxattr->gfid), off, local->stbuf.ia_size); ++ ++ if (off >= local->stbuf.ia_size) { ++ /* ++ * Hack to notify higher layers of EOF. ++ */ ++ ++ op_errno = ENOENT; ++ op_ret = 0; ++ ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED, ++ " reporting end-of-file for gfid=%s", uuid_utoa(locxattr->gfid)); ++ ++ goto err; ++ } ++ ++ if (!size) { ++ op_errno = EINVAL; ++ ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED, ++ " zero size read attempted on gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ if (!(parch->fops.read)) { ++ op_errno = ELIBBAD; ++ goto err; ++ } ++ ++ /* ++ * The read request need to be processed. Allocate a request. ++ */ ++ req = cvlt_alloc_req(parch); ++ ++ if (!req) { ++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_NO_MEMORY, ++ " failed to allocated request for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ req->iobuf = iobuf_get_page_aligned(parch->iobuf_pool, size, ALIGN_SIZE); ++ if (!req->iobuf) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ /* ++ * Initialize the request object. ++ */ ++ req->op_type = CVLT_READ_OP; ++ req->offset = off; ++ req->bytes = size; ++ req->frame = frame; ++ req->szxattr.size = local->stbuf.ia_size; ++ req->szxattr.blocks = local->stbuf.ia_blocks; ++ req->szxattr.blksize = local->stbuf.ia_blksize; ++ ++ /* ++ * The file is currently residing inside a data management store. ++ * To read the file contents we need to provide the information ++ * about data management store. ++ */ ++ op_ret = cvlt_init_store_info(parch, &(req->store_info)); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract store info for gfid=%s" ++ " offset=%" PRIu64 " size=%" GF_PRI_SIZET ++ ", " ++ " buf=%p", ++ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); ++ goto err; ++ } ++ ++ op_ret = cvlt_init_file_info(locxattr, &(req->file_info)); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract file info for gfid=%s" ++ " offset=%" PRIu64 " size=%" GF_PRI_SIZET ++ ", " ++ " buf=%p", ++ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); ++ goto err; ++ } ++ ++ /* ++ * Submit the read request. ++ */ ++ op_ret = parch->fops.read(&(parch->descinfo), &(req->store_info), ++ &(req->file_info), off, req->iobuf->ptr, size, ++ &op_errno, cvlt_readv_complete, req); ++ ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " read failed on gfid=%s" ++ " offset=%" PRIu64 " size=%" GF_PRI_SIZET ++ ", " ++ " buf=%p", ++ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); ++ goto err; ++ } ++ ++ return 0; ++ ++err: ++ ++ iobref = iobref_new(); ++ gf_msg_debug(plugin, 0, " read unwinding stack op_ret = %d, op_errno = %d", ++ op_ret, op_errno); ++ ++ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, ++ &(local->stbuf), iobref, local->xattr_rsp); ++ ++ if (iobref) { ++ iobref_unref(iobref); ++ } ++ ++ if (req) { ++ cvlt_free_req(parch, req); ++ } ++ ++ return 0; ++} +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h +new file mode 100644 +index 0000000..c45ac94 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h +@@ -0,0 +1,84 @@ ++/* ++ Copyright (c) 2018 Commvault Systems, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++#ifndef _LIBCVLT_H ++#define _LIBCVLT_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "cloudsync-common.h" ++#include "libcvlt-mem-types.h" ++#include "archivestore.h" ++ ++enum _cvlt_op { ++ CVLT_READ_OP = 1, ++ CVLT_WRITE_OP = 2, ++ CVLT_RESTORE_OP = 3, ++ CVLT_ARCHIVE_OP = 4, ++ CVLT_LOOKUP_OP = 5, ++ CVLT_XATTR_OP = 6, ++ CVLT_STAT_OP = 7, ++ CVLT_FSTAT_op = 8, ++ CVLT_UNDEF_OP = 127 ++}; ++typedef enum _cvlt_op cvlt_op_t; ++ ++struct _archive; ++struct _cvlt_request { ++ uint64_t offset; ++ uint64_t bytes; ++ struct iobuf *iobuf; ++ struct iobref *iobref; ++ call_frame_t *frame; ++ cvlt_op_t op_type; ++ int32_t op_ret; ++ int32_t op_errno; ++ xlator_t *this; ++ sem_t sem; ++ archstore_info_t store_info; ++ archstore_fileinfo_t file_info; ++ cs_size_xattr_t szxattr; ++}; ++typedef struct _cvlt_request cvlt_request_t; ++ ++struct _archive { ++ gf_lock_t lock; /* lock for controlling access */ ++ xlator_t *xl; /* xlator */ ++ void *handle; /* handle returned from dlopen */ ++ int32_t nreqs; /* num requests active */ ++ struct mem_pool *req_pool; /* pool for requests */ ++ struct iobuf_pool *iobuf_pool; /* iobuff pool */ ++ archstore_desc_t descinfo; /* Archive store descriptor info */ ++ archstore_methods_t fops; /* function pointers */ ++ char *product_id; ++ char *store_id; ++ char *trailer; ++}; ++typedef struct _archive archive_t; ++ ++void * ++cvlt_init(xlator_t *); ++ ++int ++cvlt_reconfigure(xlator_t *, dict_t *); ++ ++void ++cvlt_fini(void *); ++ ++int ++cvlt_download(call_frame_t *, void *); ++ ++int ++cvlt_read(call_frame_t *, void *); ++ ++#endif +diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c +index 2240fc3..8026b05 100644 +--- a/xlators/features/cloudsync/src/cloudsync.c ++++ b/xlators/features/cloudsync/src/cloudsync.c +@@ -39,7 +39,11 @@ struct cs_plugin plugins[] = { + {.name = "cloudsyncs3", + .library = "cloudsyncs3.so", + .description = "cloudsync s3 store."}, +- ++#if defined(__linux__) ++ {.name = "cvlt", ++ .library = "cloudsynccvlt.so", ++ .description = "Commvault content store."}, ++#endif + {.name = NULL}, + }; + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 73abf37..7a83124 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3724,6 +3724,14 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "features.cloudsync-remote-read", + .voltype = "features/cloudsync", + .value = "off", +- .op_version = GD_OP_VERSION_6_0, ++ .op_version = GD_OP_VERSION_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT}, ++ {.key = "features.cloudsync-store-id", ++ .voltype = "features/cloudsync", ++ .op_version = GD_OP_VERSION_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT}, ++ {.key = "features.cloudsync-product-id", ++ .voltype = "features/cloudsync", ++ .op_version = GD_OP_VERSION_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = NULL}}; +-- +1.8.3.1 + diff --git a/SOURCES/0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch b/SOURCES/0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch new file mode 100644 index 0000000..937a772 --- /dev/null +++ b/SOURCES/0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch @@ -0,0 +1,114 @@ +From 693fcf327eace37fe698953b90050d67fc840ac6 Mon Sep 17 00:00:00 2001 +From: Anuradha Talur +Date: Wed, 24 Apr 2019 12:06:23 -0700 +Subject: [PATCH 155/169] cloudsync: Make readdirp return stat info of all the + dirents + +This change got missed while the initial changes were sent. +Should have been a part of : + https://review.gluster.org/#/c/glusterfs/+/21757/ + +Gist of the change: + Function that fills in stat info for dirents is +invoked in readdirp in posix when cloudsync populates xdata +request with GF_CS_OBJECT_STATUS. + +backport of:https://review.gluster.org/#/c/glusterfs/+/22616/ + +> Change-Id: Ide0c4e80afb74cd2120f74ba934ed40123152d69 +> updates: bz#1642168 +> Signed-off-by: Anuradha Talur + +Change-Id: I77de3f9d8ae01a0280a9d1753f94d74b5e5ce2fd +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/172193 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/features/cloudsync/src/cloudsync-fops-c.py | 2 +- + xlators/features/cloudsync/src/cloudsync.c | 35 ++++++++++++++++++++++ + xlators/storage/posix/src/posix-inode-fd-ops.c | 2 ++ + 3 files changed, 38 insertions(+), 1 deletion(-) + +diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py +index a7a2201..8878b70 100755 +--- a/xlators/features/cloudsync/src/cloudsync-fops-c.py ++++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py +@@ -285,7 +285,7 @@ loc_stat_op_fop_template = ['lookup', 'stat', 'discover', 'access', 'setattr', + + # These fops need a separate implementation + special_fops = ['statfs', 'setxattr', 'unlink', 'getxattr', +- 'truncate', 'fstat', 'readv'] ++ 'truncate', 'fstat', 'readv', 'readdirp'] + + def gen_defaults(): + for name in ops: +diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c +index 8026b05..26e512c 100644 +--- a/xlators/features/cloudsync/src/cloudsync.c ++++ b/xlators/features/cloudsync/src/cloudsync.c +@@ -280,6 +280,40 @@ out: + } + + int32_t ++cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t off, dict_t *xdata) ++{ ++ int ret = 0; ++ int op_errno = ENOMEM; ++ ++ if (!xdata) { ++ xdata = dict_new(); ++ if (!xdata) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, ++ "failed to create " ++ "dict"); ++ goto err; ++ } ++ } ++ ++ ret = dict_set_uint32(xdata, GF_CS_OBJECT_STATUS, 1); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "dict_set failed key:" ++ " %s", ++ GF_CS_OBJECT_STATUS); ++ goto err; ++ } ++ ++ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); ++ return 0; ++err: ++ STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL); ++ return 0; ++} ++ ++int32_t + cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +@@ -2026,6 +2060,7 @@ cs_notify(xlator_t *this, int event, void *data, ...) + + struct xlator_fops cs_fops = { + .stat = cs_stat, ++ .readdirp = cs_readdirp, + .truncate = cs_truncate, + .seek = cs_seek, + .statfs = cs_statfs, +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 065fced..2c19ce1 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -5472,6 +5472,8 @@ posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries, + continue; + } + ++ posix_update_iatt_buf(&stbuf, -1, hpath, dict); ++ + if (!inode) + inode = inode_find(itable, stbuf.ia_gfid); + +-- +1.8.3.1 + diff --git a/SOURCES/0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch b/SOURCES/0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch new file mode 100644 index 0000000..1a73388 --- /dev/null +++ b/SOURCES/0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch @@ -0,0 +1,94 @@ +From d8c98e9785e652692d928a2efbbb571703f728b0 Mon Sep 17 00:00:00 2001 +From: Anuradha Talur +Date: Wed, 24 Apr 2019 12:35:08 -0700 +Subject: [PATCH 156/169] cloudsync: Fix bug in cloudsync-fops-c.py + +In some of the fops generated by generator.py, xdata request +was not being wound to the child xlator correctly. + +This was happening because when though the logic in +cloudsync-fops-c.py was correct, generator.py was generating +a resultant code that omits this logic. + +Made changes in cloudsync-fops-c.py so that correct code is +produced. + +backport of: https://review.gluster.org/#/c/glusterfs/+/22617/ + +> Change-Id: I6f25bdb36ede06fd03be32c04087a75639d79150 +> updates: bz#1642168 +> Signed-off-by: Anuradha Talur + +Change-Id: I87cc71e98c2c6cec78a6e84850fc8d82f8dd4dfd +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/172195 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/features/cloudsync/src/cloudsync-fops-c.py | 24 +++++++++++++++++++--- + 1 file changed, 21 insertions(+), 3 deletions(-) + +diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py +index 8878b70..c444ea6 100755 +--- a/xlators/features/cloudsync/src/cloudsync-fops-c.py ++++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py +@@ -39,7 +39,15 @@ cs_@NAME@ (call_frame_t *frame, xlator_t *this, + else + state = GF_CS_LOCAL; + +- local->xattr_req = xdata ? dict_ref (xdata) : (xdata = dict_new ()); ++ xdata = xdata ? dict_ref (xdata) : dict_new (); ++ ++ if (!xdata) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ local->xattr_req = xdata; + + ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { +@@ -187,19 +195,29 @@ int32_t + cs_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) + { ++ int op_errno = EINVAL; + cs_local_t *local = NULL; + int ret = 0; + + local = cs_local_init (this, frame, loc, NULL, GF_FOP_@UPNAME@); + if (!local) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local is NULL"); ++ op_errno = ENOMEM; + goto err; + } + + if (loc->inode->ia_type == IA_IFDIR) + goto wind; + +- local->xattr_req = xdata ? dict_ref (xdata) : dict_new (); ++ xdata = xdata ? dict_ref (xdata) : dict_new (); ++ ++ if (!xdata) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ local->xattr_req = xdata; + + ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { +@@ -215,7 +233,7 @@ wind: + + return 0; + err: +- CS_STACK_UNWIND (@NAME@, frame, -1, errno, @CBK_ERROR_ARGS@); ++ CS_STACK_UNWIND (@NAME@, frame, -1, op_errno, @CBK_ERROR_ARGS@); + + return 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch b/SOURCES/0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch new file mode 100644 index 0000000..185a24a --- /dev/null +++ b/SOURCES/0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch @@ -0,0 +1,68 @@ +From 4a72ac20f728aa5c3141359ff89f1b61d4cd210a Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Fri, 17 May 2019 23:03:35 +0530 +Subject: [PATCH 157/169] afr/frame: Destroy frame after + afr_selfheal_entry_granular + +In function "afr_selfheal_entry_granular", after completing the +heal we are not destroying the frame. This will lead to crash. +when we execute statedump operation, where it tried to access +xlator object. If this xlator object is freed as part of the +graph destroy this will lead to an invalid memory access + +Upstream patch:https://review.gluster.org/22743 + +>Change-Id: I0a5e78e704ef257c3ac0087eab2c310e78fbe36d +>fixes: bz#1708926 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: I326354008e6d98376c8333d270f2f80036ad07f0 +BUG: 1716626 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/172282 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + xlators/cluster/afr/src/afr-self-heal-entry.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index fc09b4c..a6890fa 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -832,6 +832,8 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd, + subvol = priv->children[subvol_idx]; + + args.frame = afr_copy_frame(frame); ++ if (!args.frame) ++ goto out; + args.xl = this; + /* args.heal_fd represents the fd associated with the original directory + * on which entry heal is being attempted. +@@ -850,9 +852,10 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd, + * do not treat heal as failure. + */ + if (is_src) +- return -errno; ++ ret = -errno; + else +- return 0; ++ ret = 0; ++ goto out; + } + + ret = syncop_dir_scan(subvol, &loc, GF_CLIENT_PID_SELF_HEALD, &args, +@@ -862,7 +865,9 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd, + + if (args.mismatch == _gf_true) + ret = -1; +- ++out: ++ if (args.frame) ++ AFR_STACK_DESTROY(args.frame); + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch b/SOURCES/0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch new file mode 100644 index 0000000..d12e81d --- /dev/null +++ b/SOURCES/0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch @@ -0,0 +1,162 @@ +From 11b64d494c52004002f900888694d20ef8af6df6 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Sat, 11 May 2019 22:40:22 +0530 +Subject: [PATCH 158/169] glusterfsd/cleanup: Protect graph object under a lock + +While processing a cleanup_and_exit function, we are +accessing a graph object. But this has not been protected +under a lock. Because a parallel cleanup of a graph is quite +possible which might lead to an invalid memory access + +Upstream patch:https://review.gluster.org/#/c/glusterfs/+/22709/ + +>Change-Id: Id05ca70d5b57e172b0401d07b6a1f5386c044e79 +>fixes: bz#1708926 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: I55ab0525c79baa99a3bd929ee979c5519be5ab21 +BUG: 1716626 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/172283 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + libglusterfs/src/graph.c | 58 +++++++++++++++---------- + libglusterfs/src/statedump.c | 16 +++++-- + tests/bugs/glusterd/optimized-basic-testcases.t | 4 +- + 3 files changed, 50 insertions(+), 28 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 4c8b02d..18fb2d9 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1392,8 +1392,12 @@ glusterfs_graph_cleanup(void *arg) + } + pthread_mutex_unlock(&ctx->notify_lock); + +- glusterfs_graph_fini(graph); +- glusterfs_graph_destroy(graph); ++ pthread_mutex_lock(&ctx->cleanup_lock); ++ { ++ glusterfs_graph_fini(graph); ++ glusterfs_graph_destroy(graph); ++ } ++ pthread_mutex_unlock(&ctx->cleanup_lock); + out: + return NULL; + } +@@ -1468,31 +1472,37 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + + if (!ctx || !ctx->active || !volfile_obj) + goto out; +- parent_graph = ctx->active; +- graph = volfile_obj->graph; +- if (!graph) +- goto out; +- if (graph->first) +- xl = graph->first; + +- last_xl = graph->last_xl; +- if (last_xl) +- last_xl->next = NULL; +- if (!xl || xl->cleanup_starting) +- goto out; ++ pthread_mutex_lock(&ctx->cleanup_lock); ++ { ++ parent_graph = ctx->active; ++ graph = volfile_obj->graph; ++ if (!graph) ++ goto unlock; ++ if (graph->first) ++ xl = graph->first; ++ ++ last_xl = graph->last_xl; ++ if (last_xl) ++ last_xl->next = NULL; ++ if (!xl || xl->cleanup_starting) ++ goto unlock; + +- xl->cleanup_starting = 1; +- gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED, +- "detaching child %s", volfile_obj->vol_id); ++ xl->cleanup_starting = 1; ++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED, ++ "detaching child %s", volfile_obj->vol_id); + +- list_del_init(&volfile_obj->volfile_list); +- glusterfs_mux_xlator_unlink(parent_graph->top, xl); +- parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); +- parent_graph->xl_count -= graph->xl_count; +- parent_graph->leaf_count -= graph->leaf_count; +- default_notify(xl, GF_EVENT_PARENT_DOWN, xl); +- parent_graph->id++; +- ret = 0; ++ list_del_init(&volfile_obj->volfile_list); ++ glusterfs_mux_xlator_unlink(parent_graph->top, xl); ++ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); ++ parent_graph->xl_count -= graph->xl_count; ++ parent_graph->leaf_count -= graph->leaf_count; ++ default_notify(xl, GF_EVENT_PARENT_DOWN, xl); ++ parent_graph->id++; ++ ret = 0; ++ } ++unlock: ++ pthread_mutex_unlock(&ctx->cleanup_lock); + out: + if (!ret) { + list_del_init(&volfile_obj->volfile_list); +diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c +index 0cf80c0..0d58f8f 100644 +--- a/libglusterfs/src/statedump.c ++++ b/libglusterfs/src/statedump.c +@@ -805,11 +805,17 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) + int brick_count = 0; + int len = 0; + +- gf_proc_dump_lock(); +- + if (!ctx) + goto out; + ++ /* ++ * Multiplexed daemons can change the active graph when attach/detach ++ * is called. So this has to be protected with the cleanup lock. ++ */ ++ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) ++ pthread_mutex_lock(&ctx->cleanup_lock); ++ gf_proc_dump_lock(); ++ + if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) && + (ctx && ctx->active)) { + top = ctx->active->first; +@@ -923,7 +929,11 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) + out: + GF_FREE(dump_options.dump_path); + dump_options.dump_path = NULL; +- gf_proc_dump_unlock(); ++ if (ctx) { ++ gf_proc_dump_unlock(); ++ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) ++ pthread_mutex_unlock(&ctx->cleanup_lock); ++ } + + return; + } +diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t +index d700b5e..110f1b9 100644 +--- a/tests/bugs/glusterd/optimized-basic-testcases.t ++++ b/tests/bugs/glusterd/optimized-basic-testcases.t +@@ -289,7 +289,9 @@ mkdir -p /xyz/var/lib/glusterd/abc + TEST $CLI volume create "test" $H0:/xyz/var/lib/glusterd/abc + EXPECT 'Created' volinfo_field "test" 'Status'; + +-EXPECT "1" generate_statedump_and_check_for_glusterd_info ++#While taking a statedump, there is a TRY_LOCK on call_frame, which might may cause ++#failure. So Adding a EXPECT_WITHIN ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" generate_statedump_and_check_for_glusterd_info + + cleanup_statedump `pidof glusterd` + cleanup +-- +1.8.3.1 + diff --git a/SOURCES/0159-glusterd-add-an-op-version-check.patch b/SOURCES/0159-glusterd-add-an-op-version-check.patch new file mode 100644 index 0000000..323ae95 --- /dev/null +++ b/SOURCES/0159-glusterd-add-an-op-version-check.patch @@ -0,0 +1,66 @@ +From bd087c3d2766b81b25ea7bbe425b55023fd12545 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 15 May 2019 07:35:45 +0530 +Subject: [PATCH 159/169] glusterd: add an op-version check + +Problem: "gluster v status" is hung in heterogenous cluster +when issued from a non-upgraded node. + +Cause: commit 34e010d64 fixes the txn-opinfo mem leak +in op-sm framework by not setting the txn-opinfo if some +conditions are true. When vol status is issued from a +non-upgraded node, command is hanging in its upgraded peer +as the upgraded node setting the txn-opinfo based on new +conditions where as non-upgraded nodes are following diff +conditions. + +Fix: Add an op-version check, so that all the nodes follow +same set of conditions to set txn-opinfo. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22730/ + +BUG: 1707246 + +> fixes: bz#1710159 +> Change-Id: Ie1f353212c5931ddd1b728d2e6949dfe6225c4ab +> Signed-off-by: Sanju Rakonde + +Change-Id: Ie1f353212c5931ddd1b728d2e6949dfe6225c4ab +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/172307 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 94a5e1f..d0c1a2c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -8158,9 +8158,12 @@ glusterd_op_sm() + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info; ++ glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); ++ priv = this->private; ++ GF_ASSERT(priv); + + ret = synclock_trylock(&gd_op_sm_lock); + if (ret) { +@@ -8238,7 +8241,8 @@ glusterd_op_sm() + "Unable to clear " + "transaction's opinfo"); + } else { +- if (!(event_type == GD_OP_EVENT_STAGE_OP && ++ if ((priv->op_version < GD_OP_VERSION_6_0) || ++ !(event_type == GD_OP_EVENT_STAGE_OP && + opinfo.state.state == GD_OP_STATE_STAGED && + opinfo.skip_locking)) { + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); +-- +1.8.3.1 + diff --git a/SOURCES/0160-geo-rep-Geo-rep-help-text-issue.patch b/SOURCES/0160-geo-rep-Geo-rep-help-text-issue.patch new file mode 100644 index 0000000..efba5a4 --- /dev/null +++ b/SOURCES/0160-geo-rep-Geo-rep-help-text-issue.patch @@ -0,0 +1,41 @@ +From 77df6b8930fd4acf3d0c38220fa4317ee97d530f Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya +Date: Thu, 9 May 2019 10:43:01 +0530 +Subject: [PATCH 160/169] geo-rep: Geo-rep help text issue + +Modified Geo-rep help text for better sanity. + +>fixes: bz#1652887 +>Change-Id: I40ef7ef709eaecf0125ab4b4a7517e2c5d1ef4a0 +>Signed-off-by: Shwetha K Acharya + +backport of https://review.gluster.org/#/c/glusterfs/+/22689/ + +BUG: 1480907 +Change-Id: I40ef7ef709eaecf0125ab4b4a7517e2c5d1ef4a0 +Signed-off-by: Shwetha K Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/172316 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-volume.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 3432dbe..564aef7 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3422,8 +3422,8 @@ struct cli_cmd volume_cmds[] = { + "reset all the reconfigured options"}, + + #if (SYNCDAEMON_COMPILE) +- {"volume " GEOREP " [] [] {\\\n create [[ssh-port n] " +- "[[no-verify] | [push-pem]]] [force] \\\n" ++ {"volume " GEOREP " [] []::[] {" ++ "\\\n create [[ssh-port n] [[no-verify] \\\n | [push-pem]]] [force] \\\n" + " | start [force] \\\n | stop [force] \\\n | pause [force] \\\n | resume " + "[force] \\\n" + " | config [[[\\!]